diff --git a/src/lib.rs b/src/lib.rs index bbdacda..fc01fa2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,6 +241,7 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result = Vec::new(); while let Some((lineno, line)) = iter.next() { bytes_read += line.len() as u64; @@ -438,6 +439,10 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result anyhow::Result 0 { eprintln!( @@ -485,6 +495,7 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result( &self, - lineno: usize, + _lineno: usize, metadata: Metadata<'e>, _rank: Option, - compile_id: &Option, + _compile_id: &Option, payload: &str, ) -> anyhow::Result { if let Metadata::DumpFile(metadata) = metadata { diff --git a/src/templates.rs b/src/templates.rs index de1e77d..73b2df1 100644 --- a/src/templates.rs +++ b/src/templates.rs @@ -124,6 +124,12 @@ phase generates:
  • Inductor will apply some post grad FX passes, producing inductor_post_grad_graph
  • Inductor will perform code generation, producing the final inductor_output_code which will be executed at runtime. This output is a valid Python program and can be directly run.
  • + +{{ if has_chromium_events }} +

    Chromium Events

    +PT2 generates Chromium Trace Events in JSON on specific events during compilation. +You can download and view them in a tool like Perfetto. +{{ endif }}

    Build products below:

    @@ -139,6 +145,7 @@ Build products below: {{ endfor }} + {{ if has_unknown_stack_trie }}

    Unknown stacks

    diff --git a/src/types.rs b/src/types.rs index 439c06a..8f54b45 100644 --- a/src/types.rs +++ b/src/types.rs @@ -452,6 +452,7 @@ pub struct Envelope { pub describe_tensor: Option, pub describe_source: Option, pub dump_file: Option, + pub chromium_event: Option, #[serde(flatten)] pub _other: FxHashMap, } @@ -561,6 +562,7 @@ pub struct IndexContext { pub has_unknown_stack_trie: bool, pub num_breaks: usize, pub custom_header_html: String, + pub has_chromium_events: bool, } #[derive(Debug, Serialize)] diff --git a/tests/inputs/chromium_nanogpt_cache_miss.log b/tests/inputs/chromium_nanogpt_cache_miss.log new file mode 100644 index 0000000..eb36139 --- /dev/null +++ b/tests/inputs/chromium_nanogpt_cache_miss.log @@ -0,0 +1,54323 @@ +V0806 13:55:51.342000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/convert_frame.py", 0]} +V0806 13:55:51.343000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", 1]} +V0806 13:55:51.347000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/benchmarks/dynamo/common.py", 2]} +V0806 13:55:51.347000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/eval_frame.py", 3]} +V0806 13:55:51.347000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 436, "name": "forward_and_backward_pass", "filename": 1}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.347000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "987ad3072722930cb3b501d122b778d3"} + { + "name": "_compile.compile_inner", + "ts": 1722977751347358.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.347000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "b620def9ef9967d776f34efe72aa81c2"} + { + "name": "entire_frame_compile", + "ts": 1722977751347454.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.354000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "7a0a797a9f90400fd9294a20d8172183"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['inputs'], accessed_by=DictGetItemGuardAccessor(inputs) + | | +- TYPE_MATCH: ___check_type_id(L['inputs'], 94206128741824) + | | +- LENGTH_CHECK: len(L['inputs']) == 1 + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['clone_inputs'], accessed_by=DictGetItemGuardAccessor(clone_inputs) + | | | +- ID_MATCH: ___check_obj_id(G['clone_inputs'], 140561895812272) + +V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a1b3b6de20677769e59e37a7f0b2ff44"} + { + "name": "entire_frame_compile", + "ts": 1722977751355009.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "29faf91a8e7dc2fd407f0cd8c9138e40"} + { + "name": "_compile.compile_inner", + "ts": 1722977751355078.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "0/0", "frame_key": "1", "co_name": "forward_and_backward_pass", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 436, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977751.3473148, "entire_frame_compile_time_s": 0.007780313491821289, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function clone_inputs in file /data/users/jjwu/a/pytorch/torch/_dynamo/utils.py'"], "dynamo_time_before_restart_s": 0.0032989978790283203, "has_guarded_code": true}, "frame_id": 0, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.355000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 437, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}]}, "frame_id": 1, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.356000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "852a1ce2f4400e662ec69b85aded0d21"} + { + "name": "_compile.compile_inner", + "ts": 1722977751356089.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.356000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "bf233ab38e77bab91b061da69473da43"} + { + "name": "entire_frame_compile", + "ts": 1722977751356160.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.362000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "b5d988194b7254d348ea6c2f189958d2"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod) + | | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328) + | +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self) + | | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624) + | +- GuardManager: source=L['___stack0'], accessed_by=DictGetItemGuardAccessor(___stack0) + | | +- TYPE_MATCH: ___check_type_id(L['___stack0'], 94206128766016) + | | +- LENGTH_CHECK: len(L['___stack0']) == 1 + +V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "59b4092dd5f70a1dae78d9cdde0701a6"} + { + "name": "entire_frame_compile", + "ts": 1722977751362494.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e19fed31c91f5fe9e56a180c1627d3e8"} + { + "name": "_compile.compile_inner", + "ts": 1722977751362564.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "1/0", "frame_key": "2", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 437, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 8, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977751.3560677, "entire_frame_compile_time_s": 0.006527423858642578, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'inline in skipfiles: BenchmarkRunner.optimizer_zero_grad | inner /data/users/jjwu/a/pytorch/torch/_compile.py, skipped according trace_rules.lookup SKIP_DIRS'"], "dynamo_time_before_restart_s": 0.001367807388305664, "has_guarded_code": true}, "frame_id": 1, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.363000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}]}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.363000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c96559249d58820ff5a3a4d087e4c79e"} + { + "name": "_compile.compile_inner", + "ts": 1722977751363510.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.363000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "213a253af88886478de090f7196d7e90"} + { + "name": "entire_frame_compile", + "ts": 1722977751363579.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:51.366000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 4, "size": 760}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.367000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.int64", "device": "device(type='cuda', index=0)", "size": [1, 64], "is_leaf": true, "stride": [64, 1], "storage": 0, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.367000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 0, "source": "L['cloned_inputs'][0]"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 4, "size": 154533888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 1, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 2, "source": "L['mod']._modules['transformer']._modules['wte']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.376000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 4, "size": 3145728}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.376000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.377000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 3, "source": "L['mod']._modules['transformer']._modules['wpe']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 6, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 3, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 6, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 7, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 7, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 5, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 11, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 12, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 7, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 36, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 8, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 37, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 9, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 44, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 45, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 11, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 49, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 50, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 12, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 50, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 13, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 66, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 67, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 15, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 71, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 72, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 17, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 76, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 77, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 77, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 78, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 19, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 78, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 79, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 20, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 79, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 21, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 80, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 81, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.466000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 23, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 85, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 86, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 24, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 86, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 87, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 25, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 87, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.474000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 88, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.474000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 88, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 27, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 89, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 90, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.484000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 29, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 94, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 95, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 95, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.494000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 96, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 31, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 96, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 97, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 32, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.496000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 97, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 33, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 98, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 99, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 35, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 103, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 104, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 36, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 104, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 105, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 37, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 105, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 106, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 106, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 39, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 107, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 108, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 41, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 112, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 113, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 113, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 114, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 43, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 114, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 115, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 44, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 115, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 45, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 116, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.538000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 117, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 47, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 121, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 122, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 48, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 122, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 123, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 49, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 123, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 124, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 124, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 51, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 125, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 126, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 53, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 130, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.560000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 131, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.560000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 131, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 132, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 55, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 132, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 133, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 56, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 133, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 57, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 134, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 135, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 59, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 139, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 140, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 60, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 140, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 141, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 61, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 141, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 142, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 142, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 63, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 143, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 144, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 65, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 148, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 149, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 149, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 150, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 67, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 150, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 151, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 68, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 151, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 69, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 152, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 153, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 71, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 157, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 158, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 72, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 158, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 159, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 73, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 159, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 160, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 160, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 75, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 161, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 162, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 77, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 166, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 167, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 167, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 168, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 79, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 168, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 169, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 80, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 169, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 81, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 170, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 171, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 83, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 175, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 176, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 84, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 176, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 177, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 85, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 177, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 178, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 178, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 87, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 179, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 180, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 89, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 184, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 185, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 185, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 186, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 91, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 186, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 187, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 92, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 187, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 93, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 188, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 189, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 95, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 193, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 194, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 96, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 194, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 195, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 97, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 195, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 196, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 196, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.702000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 99, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 197, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 198, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 101, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 202, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 203, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 203, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 204, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 103, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 204, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 205, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 104, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 205, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 105, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 206, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 207, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 107, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 211, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 212, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 108, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.729000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 212, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 213, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 109, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 213, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.735000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 214, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.735000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 214, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 111, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 215, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.741000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.741000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 216, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 113, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 220, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 221, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 221, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 222, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 115, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 222, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 223, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 116, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 223, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 224, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 117, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 224, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 225, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 225, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 229, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 119, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 229, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.766000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 230, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 120, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.766000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 230, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 231, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 121, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 231, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 232, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 232, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 233, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 123, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 233, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 234, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 234, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.782000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 238, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 125, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 238, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 239, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.784000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 239, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.792000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 240, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 127, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 240, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 241, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 128, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 241, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 242, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 129, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 242, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 243, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 243, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 247, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 131, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 247, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 248, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 132, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 248, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 249, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 133, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 249, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 250, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 250, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 251, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 135, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 251, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 252, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 252, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.819000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.819000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 256, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 137, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 256, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 257, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 257, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 258, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 139, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 258, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 259, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 140, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 259, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 260, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 141, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 260, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 261, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 261, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 265, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 143, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 265, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 266, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 144, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 266, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 267, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 145, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 267, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 268, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 268, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 269, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 147, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 269, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 270, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "view_func": "", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 270, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0} +V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 5, "size": 760}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.int64", "device": "device(type='cuda', index=0)", "size": [1, 64], "is_leaf": true, "stride": [64, 1], "storage": 0, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 0, "source": "L['cloned_inputs'][0]"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 5, "size": 154533888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 1, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 1, "source": "L['mod']._modules['transformer']._modules['wte']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 5, "size": 3145728}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 2, "source": "L['mod']._modules['transformer']._modules['wpe']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 3, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 3, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 4, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 4, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 8, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 5, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 8, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 9, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 9, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 10, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 7, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 10, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 8, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 11, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 9, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 12, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 13, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 13, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.902000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 17, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 11, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 17, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.904000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 18, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 12, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.904000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 18, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 19, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 13, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 19, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 20, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 20, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 21, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 15, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 21, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 22, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 22, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 26, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 17, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 26, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 27, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 27, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 28, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 19, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 28, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 29, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 20, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 29, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 30, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 21, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 30, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 31, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 31, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 35, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 23, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 35, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 24, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 36, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 25, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 37, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 38, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 38, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 39, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 27, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 39, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 40, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 40, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 29, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 44, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 45, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 46, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 31, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 46, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 47, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 32, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 47, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 48, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 33, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 48, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 49, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 53, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 35, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 53, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 54, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 36, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 54, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 55, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 37, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 55, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 56, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 56, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.989000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 57, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 39, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 57, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.991000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 58, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.991000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 58, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 62, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 41, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 62, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 63, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 63, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 64, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 43, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 64, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 65, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 44, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 65, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 45, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 66, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 67, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 47, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 71, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 48, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.016000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 72, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 73, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 49, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 73, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 74, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 74, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 75, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 51, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 75, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 76, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 53, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 80, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 81, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 82, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 55, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 82, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 83, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 56, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 83, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 84, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 57, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 84, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 85, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 59, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 89, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 60, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.053000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 90, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 91, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 61, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 91, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 92, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 92, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 93, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 63, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 93, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 94, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 65, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 98, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 99, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 100, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 67, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 100, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 101, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 68, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 101, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 102, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 69, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 102, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 103, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.088000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.088000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 71, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 107, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 72, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 108, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.094000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 109, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 73, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 109, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 110, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.096000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 110, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 111, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 75, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 111, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 112, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 77, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 116, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 117, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 118, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 79, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 118, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 119, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 80, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 119, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 120, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 81, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 120, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 121, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 83, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 125, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 84, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 126, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 127, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 85, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 127, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 128, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 128, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 129, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 87, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 129, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 130, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 89, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 134, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 135, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 136, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 91, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 136, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 137, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 92, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 137, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 138, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 93, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 138, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 139, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 95, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 143, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 96, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 144, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 145, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 97, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 145, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 146, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 146, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 147, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 99, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 147, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 148, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 101, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 152, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 153, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 154, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 103, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 154, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 155, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 104, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 155, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 156, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 105, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 156, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 157, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 107, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 161, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 108, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 162, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 163, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 109, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 163, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 164, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 164, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 165, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 111, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 165, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 166, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.217000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 113, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 170, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.219000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.219000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 171, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.227000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 172, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 115, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 172, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 173, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 116, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 173, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 174, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 117, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 174, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 175, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 119, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 179, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 120, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 180, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 181, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 121, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 181, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 182, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 182, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 183, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 123, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 183, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 184, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 125, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 188, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 189, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 190, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 127, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 190, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 191, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 128, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 191, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 192, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 129, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 192, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 193, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 131, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 197, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 132, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 198, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 199, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 133, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 199, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 200, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 200, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 201, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 135, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 201, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 202, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 137, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 206, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 207, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 208, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 139, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 208, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 209, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 140, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 209, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 210, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 141, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 210, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 211, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.310000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.310000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 143, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 215, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 144, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.312000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 216, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.316000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 217, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 145, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 217, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.318000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 218, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.318000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 218, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.321000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 219, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 147, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 219, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.323000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "view_func": "", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.323000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 220, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:55:52.360000 4107173 torch/_dynamo/output_graph.py:1337] {"dynamo_output_graph": {"sizes": {"l_cloned_inputs_0_": [1, 64], "l_mod_modules_transformer_modules_wte_parameters_weight_": [50304, 768], "l_mod_modules_transformer_modules_wpe_parameters_weight_": [1024, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_ln_f_parameters_weight_": [768], "l_mod_modules_transformer_modules_ln_f_parameters_bias_": [768], "arange": [64], "pos": [1, 64], "tok_emb": [1, 64, 768], "pos_emb": [1, 64, 768], "add": [1, 64, 768], "x": [1, 64, 768], "layer_norm": [1, 64, 768], "linear": [1, 64, 2304], "q": [1, 64, 768], "k": [1, 64, 768], "v": [1, 64, 768], "view": [1, 64, 12, 64], "k_1": [1, 12, 64, 64], "view_1": [1, 64, 12, 64], "q_1": [1, 12, 64, 64], "view_2": [1, 64, 12, 64], "v_1": [1, 12, 64, 64], "y": [1, 12, 64, 64], "transpose_3": [1, 64, 12, 64], "contiguous": [1, 64, 12, 64], "y_1": [1, 64, 768], "linear_1": [1, 64, 768], "y_2": [1, 64, 768], "x_1": [1, 64, 768], "layer_norm_1": [1, 64, 768], "x_2": [1, 64, 3072], "mul": [1, 64, 3072], "pow_1": [1, 64, 3072], "mul_1": [1, 64, 3072], "add_2": [1, 64, 3072], "mul_2": [1, 64, 3072], "tanh": [1, 64, 3072], "add_3": [1, 64, 3072], "x_3": [1, 64, 3072], "x_4": [1, 64, 768], "x_5": [1, 64, 768], "x_6": [1, 64, 768], "layer_norm_2": [1, 64, 768], "linear_4": [1, 64, 2304], "q_2": [1, 64, 768], "k_2": [1, 64, 768], "v_2": [1, 64, 768], "view_4": [1, 64, 12, 64], "k_3": [1, 12, 64, 64], "view_5": [1, 64, 12, 64], "q_3": [1, 12, 64, 64], "view_6": [1, 64, 12, 64], "v_3": [1, 12, 64, 64], "y_3": [1, 12, 64, 64], "transpose_7": [1, 64, 12, 64], "contiguous_1": [1, 64, 12, 64], "y_4": [1, 64, 768], "linear_5": [1, 64, 768], "y_5": [1, 64, 768], "x_7": [1, 64, 768], "layer_norm_3": [1, 64, 768], "x_8": [1, 64, 3072], "mul_4": [1, 64, 3072], "pow_2": [1, 64, 3072], "mul_5": [1, 64, 3072], "add_6": [1, 64, 3072], "mul_6": [1, 64, 3072], "tanh_1": [1, 64, 3072], "add_7": [1, 64, 3072], "x_9": [1, 64, 3072], "x_10": [1, 64, 768], "x_11": [1, 64, 768], "x_12": [1, 64, 768], "layer_norm_4": [1, 64, 768], "linear_8": [1, 64, 2304], "q_4": [1, 64, 768], "k_4": [1, 64, 768], "v_4": [1, 64, 768], "view_8": [1, 64, 12, 64], "k_5": [1, 12, 64, 64], "view_9": [1, 64, 12, 64], "q_5": [1, 12, 64, 64], "view_10": [1, 64, 12, 64], "v_5": [1, 12, 64, 64], "y_6": [1, 12, 64, 64], "transpose_11": [1, 64, 12, 64], "contiguous_2": [1, 64, 12, 64], "y_7": [1, 64, 768], "linear_9": [1, 64, 768], "y_8": [1, 64, 768], "x_13": [1, 64, 768], "layer_norm_5": [1, 64, 768], "x_14": [1, 64, 3072], "mul_8": [1, 64, 3072], "pow_3": [1, 64, 3072], "mul_9": [1, 64, 3072], "add_10": [1, 64, 3072], "mul_10": [1, 64, 3072], "tanh_2": [1, 64, 3072], "add_11": [1, 64, 3072], "x_15": [1, 64, 3072], "x_16": [1, 64, 768], "x_17": [1, 64, 768], "x_18": [1, 64, 768], "layer_norm_6": [1, 64, 768], "linear_12": [1, 64, 2304], "q_6": [1, 64, 768], "k_6": [1, 64, 768], "v_6": [1, 64, 768], "view_12": [1, 64, 12, 64], "k_7": [1, 12, 64, 64], "view_13": [1, 64, 12, 64], "q_7": [1, 12, 64, 64], "view_14": [1, 64, 12, 64], "v_7": [1, 12, 64, 64], "y_9": [1, 12, 64, 64], "transpose_15": [1, 64, 12, 64], "contiguous_3": [1, 64, 12, 64], "y_10": [1, 64, 768], "linear_13": [1, 64, 768], "y_11": [1, 64, 768], "x_19": [1, 64, 768], "layer_norm_7": [1, 64, 768], "x_20": [1, 64, 3072], "mul_12": [1, 64, 3072], "pow_4": [1, 64, 3072], "mul_13": [1, 64, 3072], "add_14": [1, 64, 3072], "mul_14": [1, 64, 3072], "tanh_3": [1, 64, 3072], "add_15": [1, 64, 3072], "x_21": [1, 64, 3072], "x_22": [1, 64, 768], "x_23": [1, 64, 768], "x_24": [1, 64, 768], "layer_norm_8": [1, 64, 768], "linear_16": [1, 64, 2304], "q_8": [1, 64, 768], "k_8": [1, 64, 768], "v_8": [1, 64, 768], "view_16": [1, 64, 12, 64], "k_9": [1, 12, 64, 64], "view_17": [1, 64, 12, 64], "q_9": [1, 12, 64, 64], "view_18": [1, 64, 12, 64], "v_9": [1, 12, 64, 64], "y_12": [1, 12, 64, 64], "transpose_19": [1, 64, 12, 64], "contiguous_4": [1, 64, 12, 64], "y_13": [1, 64, 768], "linear_17": [1, 64, 768], "y_14": [1, 64, 768], "x_25": [1, 64, 768], "layer_norm_9": [1, 64, 768], "x_26": [1, 64, 3072], "mul_16": [1, 64, 3072], "pow_5": [1, 64, 3072], "mul_17": [1, 64, 3072], "add_18": [1, 64, 3072], "mul_18": [1, 64, 3072], "tanh_4": [1, 64, 3072], "add_19": [1, 64, 3072], "x_27": [1, 64, 3072], "x_28": [1, 64, 768], "x_29": [1, 64, 768], "x_30": [1, 64, 768], "layer_norm_10": [1, 64, 768], "linear_20": [1, 64, 2304], "q_10": [1, 64, 768], "k_10": [1, 64, 768], "v_10": [1, 64, 768], "view_20": [1, 64, 12, 64], "k_11": [1, 12, 64, 64], "view_21": [1, 64, 12, 64], "q_11": [1, 12, 64, 64], "view_22": [1, 64, 12, 64], "v_11": [1, 12, 64, 64], "y_15": [1, 12, 64, 64], "transpose_23": [1, 64, 12, 64], "contiguous_5": [1, 64, 12, 64], "y_16": [1, 64, 768], "linear_21": [1, 64, 768], "y_17": [1, 64, 768], "x_31": [1, 64, 768], "layer_norm_11": [1, 64, 768], "x_32": [1, 64, 3072], "mul_20": [1, 64, 3072], "pow_6": [1, 64, 3072], "mul_21": [1, 64, 3072], "add_22": [1, 64, 3072], "mul_22": [1, 64, 3072], "tanh_5": [1, 64, 3072], "add_23": [1, 64, 3072], "x_33": [1, 64, 3072], "x_34": [1, 64, 768], "x_35": [1, 64, 768], "x_36": [1, 64, 768], "layer_norm_12": [1, 64, 768], "linear_24": [1, 64, 2304], "q_12": [1, 64, 768], "k_12": [1, 64, 768], "v_12": [1, 64, 768], "view_24": [1, 64, 12, 64], "k_13": [1, 12, 64, 64], "view_25": [1, 64, 12, 64], "q_13": [1, 12, 64, 64], "view_26": [1, 64, 12, 64], "v_13": [1, 12, 64, 64], "y_18": [1, 12, 64, 64], "transpose_27": [1, 64, 12, 64], "contiguous_6": [1, 64, 12, 64], "y_19": [1, 64, 768], "linear_25": [1, 64, 768], "y_20": [1, 64, 768], "x_37": [1, 64, 768], "layer_norm_13": [1, 64, 768], "x_38": [1, 64, 3072], "mul_24": [1, 64, 3072], "pow_7": [1, 64, 3072], "mul_25": [1, 64, 3072], "add_26": [1, 64, 3072], "mul_26": [1, 64, 3072], "tanh_6": [1, 64, 3072], "add_27": [1, 64, 3072], "x_39": [1, 64, 3072], "x_40": [1, 64, 768], "x_41": [1, 64, 768], "x_42": [1, 64, 768], "layer_norm_14": [1, 64, 768], "linear_28": [1, 64, 2304], "q_14": [1, 64, 768], "k_14": [1, 64, 768], "v_14": [1, 64, 768], "view_28": [1, 64, 12, 64], "k_15": [1, 12, 64, 64], "view_29": [1, 64, 12, 64], "q_15": [1, 12, 64, 64], "view_30": [1, 64, 12, 64], "v_15": [1, 12, 64, 64], "y_21": [1, 12, 64, 64], "transpose_31": [1, 64, 12, 64], "contiguous_7": [1, 64, 12, 64], "y_22": [1, 64, 768], "linear_29": [1, 64, 768], "y_23": [1, 64, 768], "x_43": [1, 64, 768], "layer_norm_15": [1, 64, 768], "x_44": [1, 64, 3072], "mul_28": [1, 64, 3072], "pow_8": [1, 64, 3072], "mul_29": [1, 64, 3072], "add_30": [1, 64, 3072], "mul_30": [1, 64, 3072], "tanh_7": [1, 64, 3072], "add_31": [1, 64, 3072], "x_45": [1, 64, 3072], "x_46": [1, 64, 768], "x_47": [1, 64, 768], "x_48": [1, 64, 768], "layer_norm_16": [1, 64, 768], "linear_32": [1, 64, 2304], "q_16": [1, 64, 768], "k_16": [1, 64, 768], "v_16": [1, 64, 768], "view_32": [1, 64, 12, 64], "k_17": [1, 12, 64, 64], "view_33": [1, 64, 12, 64], "q_17": [1, 12, 64, 64], "view_34": [1, 64, 12, 64], "v_17": [1, 12, 64, 64], "y_24": [1, 12, 64, 64], "transpose_35": [1, 64, 12, 64], "contiguous_8": [1, 64, 12, 64], "y_25": [1, 64, 768], "linear_33": [1, 64, 768], "y_26": [1, 64, 768], "x_49": [1, 64, 768], "layer_norm_17": [1, 64, 768], "x_50": [1, 64, 3072], "mul_32": [1, 64, 3072], "pow_9": [1, 64, 3072], "mul_33": [1, 64, 3072], "add_34": [1, 64, 3072], "mul_34": [1, 64, 3072], "tanh_8": [1, 64, 3072], "add_35": [1, 64, 3072], "x_51": [1, 64, 3072], "x_52": [1, 64, 768], "x_53": [1, 64, 768], "x_54": [1, 64, 768], "layer_norm_18": [1, 64, 768], "linear_36": [1, 64, 2304], "q_18": [1, 64, 768], "k_18": [1, 64, 768], "v_18": [1, 64, 768], "view_36": [1, 64, 12, 64], "k_19": [1, 12, 64, 64], "view_37": [1, 64, 12, 64], "q_19": [1, 12, 64, 64], "view_38": [1, 64, 12, 64], "v_19": [1, 12, 64, 64], "y_27": [1, 12, 64, 64], "transpose_39": [1, 64, 12, 64], "contiguous_9": [1, 64, 12, 64], "y_28": [1, 64, 768], "linear_37": [1, 64, 768], "y_29": [1, 64, 768], "x_55": [1, 64, 768], "layer_norm_19": [1, 64, 768], "x_56": [1, 64, 3072], "mul_36": [1, 64, 3072], "pow_10": [1, 64, 3072], "mul_37": [1, 64, 3072], "add_38": [1, 64, 3072], "mul_38": [1, 64, 3072], "tanh_9": [1, 64, 3072], "add_39": [1, 64, 3072], "x_57": [1, 64, 3072], "x_58": [1, 64, 768], "x_59": [1, 64, 768], "x_60": [1, 64, 768], "layer_norm_20": [1, 64, 768], "linear_40": [1, 64, 2304], "q_20": [1, 64, 768], "k_20": [1, 64, 768], "v_20": [1, 64, 768], "view_40": [1, 64, 12, 64], "k_21": [1, 12, 64, 64], "view_41": [1, 64, 12, 64], "q_21": [1, 12, 64, 64], "view_42": [1, 64, 12, 64], "v_21": [1, 12, 64, 64], "y_30": [1, 12, 64, 64], "transpose_43": [1, 64, 12, 64], "contiguous_10": [1, 64, 12, 64], "y_31": [1, 64, 768], "linear_41": [1, 64, 768], "y_32": [1, 64, 768], "x_61": [1, 64, 768], "layer_norm_21": [1, 64, 768], "x_62": [1, 64, 3072], "mul_40": [1, 64, 3072], "pow_11": [1, 64, 3072], "mul_41": [1, 64, 3072], "add_42": [1, 64, 3072], "mul_42": [1, 64, 3072], "tanh_10": [1, 64, 3072], "add_43": [1, 64, 3072], "x_63": [1, 64, 3072], "x_64": [1, 64, 768], "x_65": [1, 64, 768], "x_66": [1, 64, 768], "layer_norm_22": [1, 64, 768], "linear_44": [1, 64, 2304], "q_22": [1, 64, 768], "k_22": [1, 64, 768], "v_22": [1, 64, 768], "view_44": [1, 64, 12, 64], "k_23": [1, 12, 64, 64], "view_45": [1, 64, 12, 64], "q_23": [1, 12, 64, 64], "view_46": [1, 64, 12, 64], "v_23": [1, 12, 64, 64], "y_33": [1, 12, 64, 64], "transpose_47": [1, 64, 12, 64], "contiguous_11": [1, 64, 12, 64], "y_34": [1, 64, 768], "linear_45": [1, 64, 768], "y_35": [1, 64, 768], "x_67": [1, 64, 768], "layer_norm_23": [1, 64, 768], "x_68": [1, 64, 3072], "mul_44": [1, 64, 3072], "pow_12": [1, 64, 3072], "mul_45": [1, 64, 3072], "add_46": [1, 64, 3072], "mul_46": [1, 64, 3072], "tanh_11": [1, 64, 3072], "add_47": [1, 64, 3072], "x_69": [1, 64, 3072], "x_70": [1, 64, 768], "x_71": [1, 64, 768], "x_72": [1, 64, 768], "x_73": [1, 64, 768], "getitem_36": [1, 1, 768], "logits": [1, 1, 50304]}}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "e1533188987bb53e01424902ec0e389a"} + class GraphModule(torch.nn.Module): + def forward(self, L_cloned_inputs_0_: "i64[1, 64][64, 1]cuda:0", L_mod_modules_transformer_modules_wte_parameters_weight_: "f32[50304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_wpe_parameters_weight_: "f32[1024, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_ln_f_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_ln_f_parameters_bias_: "f32[768][1]cuda:0"): + l_cloned_inputs_0_ = L_cloned_inputs_0_ + l_mod_modules_transformer_modules_wte_parameters_weight_ = L_mod_modules_transformer_modules_wte_parameters_weight_ + l_mod_modules_transformer_modules_wpe_parameters_weight_ = L_mod_modules_transformer_modules_wpe_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_ + l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_ + l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_ + l_mod_modules_transformer_modules_ln_f_parameters_weight_ = L_mod_modules_transformer_modules_ln_f_parameters_weight_ + l_mod_modules_transformer_modules_ln_f_parameters_bias_ = L_mod_modules_transformer_modules_ln_f_parameters_bias_ + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze( + arange: "i64[64][1]cuda:0" = torch.arange(0, 64, dtype = torch.int64, device = device(type='cuda', index=0)) + pos: "i64[1, 64][64, 1]cuda:0" = arange.unsqueeze(0); arange = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + tok_emb: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.embedding(l_cloned_inputs_0_, l_mod_modules_transformer_modules_wte_parameters_weight_, None, None, 2.0, False, False); l_cloned_inputs_0_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + pos_emb: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.embedding(pos, l_mod_modules_transformer_modules_wpe_parameters_weight_, None, None, 2.0, False, False); pos = l_mod_modules_transformer_modules_wpe_parameters_weight_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb) + add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = tok_emb + pos_emb; tok_emb = pos_emb = None + x: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(add, 0.0, True, False); add = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x, (768,), l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_); layer_norm = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_ = None + split = linear.split(768, dim = 2); linear = None + q: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0] + k: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1] + v: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2]; split = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k.view(1, 64, 12, 64); k = None + k_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view.transpose(1, 2); view = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_1: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q.view(1, 64, 12, 64); q = None + q_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_1.transpose(1, 2); view_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v.view(1, 64, 12, 64); v = None + v_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_2.transpose(1, 2); view_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_1, k_1, v_1, attn_mask = None, dropout_p = 0.0, is_causal = True); q_1 = k_1 = v_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_3: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y.transpose(1, 2); y = None + contiguous: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_3.contiguous(); transpose_3 = None + y_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous.view(1, 64, 768); contiguous = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_1, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_); y_1 = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_ = None + y_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_1, 0.0, True, False); linear_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x + y_2; x = y_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_1, (768,), l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_1, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_1 = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_2 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_2, 3.0) + mul_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_1; pow_1 = None + add_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_2 + mul_1; x_2 = mul_1 = None + mul_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_2; add_2 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_2); mul_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh; tanh = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul * add_3; mul = add_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_3, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_); x_3 = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_4, 0.0, True, False); x_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_1 + x_5; x_1 = x_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_6, (768,), l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_2, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_); layer_norm_2 = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_ = None + split_1 = linear_4.split(768, dim = 2); linear_4 = None + q_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0] + k_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1] + v_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2]; split_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_2.view(1, 64, 12, 64); k_2 = None + k_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_4.transpose(1, 2); view_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_5: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_2.view(1, 64, 12, 64); q_2 = None + q_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_5.transpose(1, 2); view_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_6: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_2.view(1, 64, 12, 64); v_2 = None + v_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_6.transpose(1, 2); view_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_3: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_3, k_3, v_3, attn_mask = None, dropout_p = 0.0, is_causal = True); q_3 = k_3 = v_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_7: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_3.transpose(1, 2); y_3 = None + contiguous_1: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_7.contiguous(); transpose_7 = None + y_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_1.view(1, 64, 768); contiguous_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_4, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_); y_4 = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_ = None + y_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_5, 0.0, True, False); linear_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_6 + y_5; x_6 = y_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_7, (768,), l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_3, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_3 = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_8 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_8, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_2; pow_2 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_8 + mul_5; x_8 = mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_6; add_6 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_6); mul_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_1; tanh_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_4 * add_7; mul_4 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_9, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_); x_9 = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_10, 0.0, True, False); x_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_7 + x_11; x_7 = x_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_12, (768,), l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_4, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_); layer_norm_4 = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_ = None + split_2 = linear_8.split(768, dim = 2); linear_8 = None + q_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0] + k_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1] + v_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2]; split_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_8: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_4.view(1, 64, 12, 64); k_4 = None + k_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_8.transpose(1, 2); view_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_9: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_4.view(1, 64, 12, 64); q_4 = None + q_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_9.transpose(1, 2); view_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_10: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_4.view(1, 64, 12, 64); v_4 = None + v_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_10.transpose(1, 2); view_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_6: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_5, k_5, v_5, attn_mask = None, dropout_p = 0.0, is_causal = True); q_5 = k_5 = v_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_11: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_6.transpose(1, 2); y_6 = None + contiguous_2: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_11.contiguous(); transpose_11 = None + y_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_2.view(1, 64, 768); contiguous_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_7, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_); y_7 = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_ = None + y_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_9, 0.0, True, False); linear_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_12 + y_8; x_12 = y_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_13, (768,), l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_5, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_5 = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_14 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_14, 3.0) + mul_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_3; pow_3 = None + add_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_14 + mul_9; x_14 = mul_9 = None + mul_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_10; add_10 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_10); mul_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_2; tanh_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_8 * add_11; mul_8 = add_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_15, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_); x_15 = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_16, 0.0, True, False); x_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_13 + x_17; x_13 = x_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_18, (768,), l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_12: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_6, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_); layer_norm_6 = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_ = None + split_3 = linear_12.split(768, dim = 2); linear_12 = None + q_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0] + k_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1] + v_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2]; split_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_12: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_6.view(1, 64, 12, 64); k_6 = None + k_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_12.transpose(1, 2); view_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_13: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_6.view(1, 64, 12, 64); q_6 = None + q_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_13.transpose(1, 2); view_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_6.view(1, 64, 12, 64); v_6 = None + v_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_14.transpose(1, 2); view_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_9: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_7, k_7, v_7, attn_mask = None, dropout_p = 0.0, is_causal = True); q_7 = k_7 = v_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_15: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_9.transpose(1, 2); y_9 = None + contiguous_3: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_15.contiguous(); transpose_15 = None + y_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_3.view(1, 64, 768); contiguous_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_10, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_); y_10 = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_ = None + y_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_13, 0.0, True, False); linear_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_18 + y_11; x_18 = y_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_19, (768,), l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_7, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_7 = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_20 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_20, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_4; pow_4 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_20 + mul_13; x_20 = mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_14; add_14 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_14); mul_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_3; tanh_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_12 * add_15; mul_12 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_21, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_); x_21 = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_22, 0.0, True, False); x_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_19 + x_23; x_19 = x_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_24, (768,), l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_16: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_8, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_); layer_norm_8 = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_ = None + split_4 = linear_16.split(768, dim = 2); linear_16 = None + q_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0] + k_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1] + v_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2]; split_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_8.view(1, 64, 12, 64); k_8 = None + k_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_16.transpose(1, 2); view_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_17: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_8.view(1, 64, 12, 64); q_8 = None + q_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_17.transpose(1, 2); view_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_18: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_8.view(1, 64, 12, 64); v_8 = None + v_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_18.transpose(1, 2); view_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_12: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_9, k_9, v_9, attn_mask = None, dropout_p = 0.0, is_causal = True); q_9 = k_9 = v_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_19: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_12.transpose(1, 2); y_12 = None + contiguous_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_19.contiguous(); transpose_19 = None + y_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_4.view(1, 64, 768); contiguous_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_13, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_); y_13 = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_ = None + y_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_17, 0.0, True, False); linear_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_24 + y_14; x_24 = y_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_25, (768,), l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_9, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_9 = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_26 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_26, 3.0) + mul_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_5; pow_5 = None + add_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_26 + mul_17; x_26 = mul_17 = None + mul_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_18; add_18 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_18); mul_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_4; tanh_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_16 * add_19; mul_16 = add_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_28: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_27, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_); x_27 = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_28, 0.0, True, False); x_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_25 + x_29; x_25 = x_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_30, (768,), l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_20: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_10, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_); layer_norm_10 = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_ = None + split_5 = linear_20.split(768, dim = 2); linear_20 = None + q_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0] + k_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1] + v_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2]; split_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_20: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_10.view(1, 64, 12, 64); k_10 = None + k_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_20.transpose(1, 2); view_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_21: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_10.view(1, 64, 12, 64); q_10 = None + q_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_21.transpose(1, 2); view_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_22: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_10.view(1, 64, 12, 64); v_10 = None + v_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_22.transpose(1, 2); view_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_15: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_11, k_11, v_11, attn_mask = None, dropout_p = 0.0, is_causal = True); q_11 = k_11 = v_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_23: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_15.transpose(1, 2); y_15 = None + contiguous_5: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_23.contiguous(); transpose_23 = None + y_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_5.view(1, 64, 768); contiguous_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_16, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_); y_16 = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_ = None + y_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_21, 0.0, True, False); linear_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_30 + y_17; x_30 = y_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_31, (768,), l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_32: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_11, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_11 = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_32 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_32, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_6; pow_6 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_32 + mul_21; x_32 = mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_22; add_22 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_22); mul_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_5; tanh_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_20 * add_23; mul_20 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_33, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_); x_33 = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_34, 0.0, True, False); x_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_36: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_31 + x_35; x_31 = x_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_36, (768,), l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_24: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_12, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_); layer_norm_12 = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_ = None + split_6 = linear_24.split(768, dim = 2); linear_24 = None + q_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0] + k_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1] + v_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2]; split_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_24: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_12.view(1, 64, 12, 64); k_12 = None + k_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_24.transpose(1, 2); view_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_25: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_12.view(1, 64, 12, 64); q_12 = None + q_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_25.transpose(1, 2); view_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_12.view(1, 64, 12, 64); v_12 = None + v_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_26.transpose(1, 2); view_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_18: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_13, k_13, v_13, attn_mask = None, dropout_p = 0.0, is_causal = True); q_13 = k_13 = v_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_27: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_18.transpose(1, 2); y_18 = None + contiguous_6: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_27.contiguous(); transpose_27 = None + y_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_6.view(1, 64, 768); contiguous_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_19, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_); y_19 = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_ = None + y_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_25, 0.0, True, False); linear_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_36 + y_20; x_36 = y_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_37, (768,), l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_13, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_13 = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_38 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_38, 3.0) + mul_25: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_7; pow_7 = None + add_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_38 + mul_25; x_38 = mul_25 = None + mul_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_26; add_26 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_26); mul_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_6; tanh_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_24 * add_27; mul_24 = add_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_39, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_); x_39 = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_40, 0.0, True, False); x_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_37 + x_41; x_37 = x_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_42, (768,), l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_28: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_14, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_); layer_norm_14 = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_ = None + split_7 = linear_28.split(768, dim = 2); linear_28 = None + q_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0] + k_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1] + v_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2]; split_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_14.view(1, 64, 12, 64); k_14 = None + k_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_28.transpose(1, 2); view_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_29: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_14.view(1, 64, 12, 64); q_14 = None + q_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_29.transpose(1, 2); view_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_30: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_14.view(1, 64, 12, 64); v_14 = None + v_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_30.transpose(1, 2); view_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_21: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_15, k_15, v_15, attn_mask = None, dropout_p = 0.0, is_causal = True); q_15 = k_15 = v_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_31: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_21.transpose(1, 2); y_21 = None + contiguous_7: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_31.contiguous(); transpose_31 = None + y_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_7.view(1, 64, 768); contiguous_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_22, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_); y_22 = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_ = None + y_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_29, 0.0, True, False); linear_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_42 + y_23; x_42 = y_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_43, (768,), l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_15, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_15 = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_44 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_44, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_8; pow_8 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_44 + mul_29; x_44 = mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_30; add_30 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_30); mul_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_7; tanh_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_28 * add_31; mul_28 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_46: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_45, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_); x_45 = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_46, 0.0, True, False); x_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_43 + x_47; x_43 = x_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_48, (768,), l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_32: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_16, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_); layer_norm_16 = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_ = None + split_8 = linear_32.split(768, dim = 2); linear_32 = None + q_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0] + k_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1] + v_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2]; split_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_32: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_16.view(1, 64, 12, 64); k_16 = None + k_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_32.transpose(1, 2); view_32 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_33: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_16.view(1, 64, 12, 64); q_16 = None + q_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_33.transpose(1, 2); view_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_34: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_16.view(1, 64, 12, 64); v_16 = None + v_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_34.transpose(1, 2); view_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_24: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_17, k_17, v_17, attn_mask = None, dropout_p = 0.0, is_causal = True); q_17 = k_17 = v_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_35: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_24.transpose(1, 2); y_24 = None + contiguous_8: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_35.contiguous(); transpose_35 = None + y_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_8.view(1, 64, 768); contiguous_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_25, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_); y_25 = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_ = None + y_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_33, 0.0, True, False); linear_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_48 + y_26; x_48 = y_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_49, (768,), l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_50: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_17, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_17 = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_32: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_50 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_50, 3.0) + mul_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_9; pow_9 = None + add_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_50 + mul_33; x_50 = mul_33 = None + mul_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_34; add_34 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_34); mul_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_8; tanh_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_51: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_32 * add_35; mul_32 = add_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_51, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_); x_51 = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_52, 0.0, True, False); x_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_49 + x_53; x_49 = x_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_54, (768,), l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_36: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_18, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_); layer_norm_18 = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_ = None + split_9 = linear_36.split(768, dim = 2); linear_36 = None + q_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0] + k_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1] + v_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2]; split_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_36: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_18.view(1, 64, 12, 64); k_18 = None + k_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_36.transpose(1, 2); view_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_37: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_18.view(1, 64, 12, 64); q_18 = None + q_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_37.transpose(1, 2); view_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_18.view(1, 64, 12, 64); v_18 = None + v_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_38.transpose(1, 2); view_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_19, k_19, v_19, attn_mask = None, dropout_p = 0.0, is_causal = True); q_19 = k_19 = v_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_39: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_27.transpose(1, 2); y_27 = None + contiguous_9: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_39.contiguous(); transpose_39 = None + y_28: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_9.view(1, 64, 768); contiguous_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_28, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_); y_28 = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_ = None + y_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_37, 0.0, True, False); linear_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_54 + y_29; x_54 = y_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_55, (768,), l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_19, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_19 = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_56 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_56, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_10; pow_10 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_56 + mul_37; x_56 = mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_38; add_38 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_38); mul_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_9; tanh_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_36 * add_39; mul_36 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_57, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_); x_57 = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_58, 0.0, True, False); x_58 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_60: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_55 + x_59; x_55 = x_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_60, (768,), l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_40: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_20, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_); layer_norm_20 = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_ = None + split_10 = linear_40.split(768, dim = 2); linear_40 = None + q_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0] + k_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1] + v_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2]; split_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_20.view(1, 64, 12, 64); k_20 = None + k_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_40.transpose(1, 2); view_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_41: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_20.view(1, 64, 12, 64); q_20 = None + q_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_41.transpose(1, 2); view_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_42: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_20.view(1, 64, 12, 64); v_20 = None + v_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_42.transpose(1, 2); view_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_30: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_21, k_21, v_21, attn_mask = None, dropout_p = 0.0, is_causal = True); q_21 = k_21 = v_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_43: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_30.transpose(1, 2); y_30 = None + contiguous_10: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_43.contiguous(); transpose_43 = None + y_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_10.view(1, 64, 768); contiguous_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_31, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_); y_31 = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_ = None + y_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_41, 0.0, True, False); linear_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_60 + y_32; x_60 = y_32 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_61, (768,), l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_21, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_21 = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_40: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_62 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_62, 3.0) + mul_41: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_11; pow_11 = None + add_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_62 + mul_41; x_62 = mul_41 = None + mul_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_42; add_42 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_42); mul_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_43: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_10; tanh_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_40 * add_43; mul_40 = add_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_63, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_); x_63 = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_64, 0.0, True, False); x_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_61 + x_65; x_61 = x_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_66, (768,), l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + linear_44: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_22, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_); layer_norm_22 = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_ = None + split_11 = linear_44.split(768, dim = 2); linear_44 = None + q_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0] + k_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1] + v_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2]; split_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_44: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_22.view(1, 64, 12, 64); k_22 = None + k_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_44.transpose(1, 2); view_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_45: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_22.view(1, 64, 12, 64); q_22 = None + q_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_45.transpose(1, 2); view_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_46: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_22.view(1, 64, 12, 64); v_22 = None + v_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_46.transpose(1, 2); view_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + y_33: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_23, k_23, v_23, attn_mask = None, dropout_p = 0.0, is_causal = True); q_23 = k_23 = v_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + transpose_47: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_33.transpose(1, 2); y_33 = None + contiguous_11: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_47.contiguous(); transpose_47 = None + y_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_11.view(1, 64, 768); contiguous_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + linear_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_34, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_); y_34 = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_ = None + y_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_45, 0.0, True, False); linear_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + x_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_66 + y_35; x_66 = y_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + layer_norm_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_67, (768,), l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_, 1e-05); l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + x_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_23, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_); layer_norm_23 = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_68 + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_68, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_12; pow_12 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_68 + mul_45; x_68 = mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_46; add_46 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_46); mul_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_11; tanh_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + x_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_44 * add_47; mul_44 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + x_70: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_69, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_); x_69 = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + x_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_70, 0.0, True, False); x_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + x_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_67 + x_71; x_67 = x_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + x_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_72, (768,), l_mod_modules_transformer_modules_ln_f_parameters_weight_, l_mod_modules_transformer_modules_ln_f_parameters_bias_, 1e-05); x_72 = l_mod_modules_transformer_modules_ln_f_parameters_weight_ = l_mod_modules_transformer_modules_ln_f_parameters_bias_ = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + getitem_36: "f32[1, 1, 768][768, 768, 1]cuda:0" = x_73[(slice(None, None, None), [-1], slice(None, None, None))]; x_73 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + logits: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch._C._nn.linear(getitem_36, l_mod_modules_transformer_modules_wte_parameters_weight_, None); getitem_36 = l_mod_modules_transformer_modules_wte_parameters_weight_ = None + return (logits,) + +V0806 13:55:52.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7a6c1c6e18c552462ba4cf41955a7a9a"} + { + "name": "OutputGraph.call_user_compiler", + "ts": 1722977752361387.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:52.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "25d80fbdb85f51f969591ec89c5dc032"} + { + "name": "backend_compile", + "ts": 1722977752361482.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:53.010000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ad297a89c328174de70a3ac272334587"} + { + "name": "create_aot_dispatcher_function", + "ts": 1722977753010003.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:54.633000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:345] {"aot_joint_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "cee09115c6728ba1c8bb07bd7fcda343"} + class joint_helper(torch.nn.Module): + def forward(self, primals, tangents): + primals_1: "i64[1, 64][64, 1]cuda:0"; primals_2: "f32[50304, 768][768, 1]cuda:0"; primals_3: "f32[1024, 768][768, 1]cuda:0"; primals_4: "f32[768][1]cuda:0"; primals_5: "f32[768][1]cuda:0"; primals_6: "f32[2304, 768][768, 1]cuda:0"; primals_7: "f32[2304][1]cuda:0"; primals_8: "f32[768, 768][768, 1]cuda:0"; primals_9: "f32[768][1]cuda:0"; primals_10: "f32[768][1]cuda:0"; primals_11: "f32[768][1]cuda:0"; primals_12: "f32[3072, 768][768, 1]cuda:0"; primals_13: "f32[3072][1]cuda:0"; primals_14: "f32[768, 3072][3072, 1]cuda:0"; primals_15: "f32[768][1]cuda:0"; primals_16: "f32[768][1]cuda:0"; primals_17: "f32[768][1]cuda:0"; primals_18: "f32[2304, 768][768, 1]cuda:0"; primals_19: "f32[2304][1]cuda:0"; primals_20: "f32[768, 768][768, 1]cuda:0"; primals_21: "f32[768][1]cuda:0"; primals_22: "f32[768][1]cuda:0"; primals_23: "f32[768][1]cuda:0"; primals_24: "f32[3072, 768][768, 1]cuda:0"; primals_25: "f32[3072][1]cuda:0"; primals_26: "f32[768, 3072][3072, 1]cuda:0"; primals_27: "f32[768][1]cuda:0"; primals_28: "f32[768][1]cuda:0"; primals_29: "f32[768][1]cuda:0"; primals_30: "f32[2304, 768][768, 1]cuda:0"; primals_31: "f32[2304][1]cuda:0"; primals_32: "f32[768, 768][768, 1]cuda:0"; primals_33: "f32[768][1]cuda:0"; primals_34: "f32[768][1]cuda:0"; primals_35: "f32[768][1]cuda:0"; primals_36: "f32[3072, 768][768, 1]cuda:0"; primals_37: "f32[3072][1]cuda:0"; primals_38: "f32[768, 3072][3072, 1]cuda:0"; primals_39: "f32[768][1]cuda:0"; primals_40: "f32[768][1]cuda:0"; primals_41: "f32[768][1]cuda:0"; primals_42: "f32[2304, 768][768, 1]cuda:0"; primals_43: "f32[2304][1]cuda:0"; primals_44: "f32[768, 768][768, 1]cuda:0"; primals_45: "f32[768][1]cuda:0"; primals_46: "f32[768][1]cuda:0"; primals_47: "f32[768][1]cuda:0"; primals_48: "f32[3072, 768][768, 1]cuda:0"; primals_49: "f32[3072][1]cuda:0"; primals_50: "f32[768, 3072][3072, 1]cuda:0"; primals_51: "f32[768][1]cuda:0"; primals_52: "f32[768][1]cuda:0"; primals_53: "f32[768][1]cuda:0"; primals_54: "f32[2304, 768][768, 1]cuda:0"; primals_55: "f32[2304][1]cuda:0"; primals_56: "f32[768, 768][768, 1]cuda:0"; primals_57: "f32[768][1]cuda:0"; primals_58: "f32[768][1]cuda:0"; primals_59: "f32[768][1]cuda:0"; primals_60: "f32[3072, 768][768, 1]cuda:0"; primals_61: "f32[3072][1]cuda:0"; primals_62: "f32[768, 3072][3072, 1]cuda:0"; primals_63: "f32[768][1]cuda:0"; primals_64: "f32[768][1]cuda:0"; primals_65: "f32[768][1]cuda:0"; primals_66: "f32[2304, 768][768, 1]cuda:0"; primals_67: "f32[2304][1]cuda:0"; primals_68: "f32[768, 768][768, 1]cuda:0"; primals_69: "f32[768][1]cuda:0"; primals_70: "f32[768][1]cuda:0"; primals_71: "f32[768][1]cuda:0"; primals_72: "f32[3072, 768][768, 1]cuda:0"; primals_73: "f32[3072][1]cuda:0"; primals_74: "f32[768, 3072][3072, 1]cuda:0"; primals_75: "f32[768][1]cuda:0"; primals_76: "f32[768][1]cuda:0"; primals_77: "f32[768][1]cuda:0"; primals_78: "f32[2304, 768][768, 1]cuda:0"; primals_79: "f32[2304][1]cuda:0"; primals_80: "f32[768, 768][768, 1]cuda:0"; primals_81: "f32[768][1]cuda:0"; primals_82: "f32[768][1]cuda:0"; primals_83: "f32[768][1]cuda:0"; primals_84: "f32[3072, 768][768, 1]cuda:0"; primals_85: "f32[3072][1]cuda:0"; primals_86: "f32[768, 3072][3072, 1]cuda:0"; primals_87: "f32[768][1]cuda:0"; primals_88: "f32[768][1]cuda:0"; primals_89: "f32[768][1]cuda:0"; primals_90: "f32[2304, 768][768, 1]cuda:0"; primals_91: "f32[2304][1]cuda:0"; primals_92: "f32[768, 768][768, 1]cuda:0"; primals_93: "f32[768][1]cuda:0"; primals_94: "f32[768][1]cuda:0"; primals_95: "f32[768][1]cuda:0"; primals_96: "f32[3072, 768][768, 1]cuda:0"; primals_97: "f32[3072][1]cuda:0"; primals_98: "f32[768, 3072][3072, 1]cuda:0"; primals_99: "f32[768][1]cuda:0"; primals_100: "f32[768][1]cuda:0"; primals_101: "f32[768][1]cuda:0"; primals_102: "f32[2304, 768][768, 1]cuda:0"; primals_103: "f32[2304][1]cuda:0"; primals_104: "f32[768, 768][768, 1]cuda:0"; primals_105: "f32[768][1]cuda:0"; primals_106: "f32[768][1]cuda:0"; primals_107: "f32[768][1]cuda:0"; primals_108: "f32[3072, 768][768, 1]cuda:0"; primals_109: "f32[3072][1]cuda:0"; primals_110: "f32[768, 3072][3072, 1]cuda:0"; primals_111: "f32[768][1]cuda:0"; primals_112: "f32[768][1]cuda:0"; primals_113: "f32[768][1]cuda:0"; primals_114: "f32[2304, 768][768, 1]cuda:0"; primals_115: "f32[2304][1]cuda:0"; primals_116: "f32[768, 768][768, 1]cuda:0"; primals_117: "f32[768][1]cuda:0"; primals_118: "f32[768][1]cuda:0"; primals_119: "f32[768][1]cuda:0"; primals_120: "f32[3072, 768][768, 1]cuda:0"; primals_121: "f32[3072][1]cuda:0"; primals_122: "f32[768, 3072][3072, 1]cuda:0"; primals_123: "f32[768][1]cuda:0"; primals_124: "f32[768][1]cuda:0"; primals_125: "f32[768][1]cuda:0"; primals_126: "f32[2304, 768][768, 1]cuda:0"; primals_127: "f32[2304][1]cuda:0"; primals_128: "f32[768, 768][768, 1]cuda:0"; primals_129: "f32[768][1]cuda:0"; primals_130: "f32[768][1]cuda:0"; primals_131: "f32[768][1]cuda:0"; primals_132: "f32[3072, 768][768, 1]cuda:0"; primals_133: "f32[3072][1]cuda:0"; primals_134: "f32[768, 3072][3072, 1]cuda:0"; primals_135: "f32[768][1]cuda:0"; primals_136: "f32[768][1]cuda:0"; primals_137: "f32[768][1]cuda:0"; primals_138: "f32[2304, 768][768, 1]cuda:0"; primals_139: "f32[2304][1]cuda:0"; primals_140: "f32[768, 768][768, 1]cuda:0"; primals_141: "f32[768][1]cuda:0"; primals_142: "f32[768][1]cuda:0"; primals_143: "f32[768][1]cuda:0"; primals_144: "f32[3072, 768][768, 1]cuda:0"; primals_145: "f32[3072][1]cuda:0"; primals_146: "f32[768, 3072][3072, 1]cuda:0"; primals_147: "f32[768][1]cuda:0"; primals_148: "f32[768][1]cuda:0"; primals_149: "f32[768][1]cuda:0"; tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"; + + primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149, tangents_1, = fx_pytree.tree_flatten_spec([primals, tangents], self._in_spec) + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze( + iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) + unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0); iota = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb) + add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None + clone: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(add); add = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean = torch.ops.aten.var_mean.correction(clone, [2], correction = 0, keepdim = True) + getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0] + getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1]; var_mean = None + add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None + rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1); add_1 = None + sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(clone, getitem_1) + mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None + mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4); mul = None + add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_2, [64, 768]); add_2 = None + permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None + addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None + view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm, [1, 64, 2304]); addmm = None + split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None + getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0] + getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1] + getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2]; split = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None + permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None + permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None + permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True) + getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0] + getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1] + getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2] + getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None + alias: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_5) + alias_1: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias); alias = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]); getitem_5 = None + view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None + permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None + addmm_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_9, view_6, permute_5); primals_9 = None + view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_1, [1, 64, 768]); addmm_1 = None + clone_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_7); view_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(clone, clone_1); clone_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True) + getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0] + getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1]; var_mean_1 = None + add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None + rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4); add_4 = None + sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10) + mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None + mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10); mul_2 = None + add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_5, [64, 768]); add_5 = None + permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None + addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None + view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]); addmm_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6); mul_6 = None + alias_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh) + alias_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_2); alias_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_7, [64, 3072]); mul_7 = None + permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None + addmm_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_15, view_10, permute_7); primals_15 = None + view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_3, [1, 64, 768]); addmm_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_11); view_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, clone_2); clone_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True) + getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0] + getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1]; var_mean_2 = None + add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None + rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9); add_9 = None + sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12) + mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None + mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16); mul_8 = None + add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_10, [64, 768]); add_10 = None + permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None + addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None + view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]); addmm_4 = None + split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None + getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0] + getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1] + getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2]; split_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None + permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None + permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None + permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True) + getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0] + getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1] + getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2] + getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None + alias_4: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_16) + alias_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_4); alias_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]); getitem_16 = None + view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None + permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None + addmm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_21, view_18, permute_13); primals_21 = None + view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_5, [1, 64, 768]); addmm_5 = None + clone_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_19); view_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, clone_3); clone_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True) + getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0] + getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1]; var_mean_3 = None + add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None + rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12); add_12 = None + sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21) + mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None + mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22); mul_10 = None + add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_13, [64, 768]); add_13 = None + permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None + addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None + view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]); addmm_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14); mul_14 = None + alias_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_1) + alias_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_6); alias_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_15, [64, 3072]); mul_15 = None + permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None + addmm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_27, view_22, permute_15); primals_27 = None + view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_7, [1, 64, 768]); addmm_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_23); view_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, clone_4); clone_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True) + getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0] + getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1]; var_mean_4 = None + add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None + rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17); add_17 = None + sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23) + mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None + mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28); mul_16 = None + add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_18, [64, 768]); add_18 = None + permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None + addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None + view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]); addmm_8 = None + split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None + getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0] + getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1] + getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2]; split_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None + permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None + permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None + permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True) + getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0] + getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1] + getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2] + getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None + alias_8: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_27) + alias_9: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_8); alias_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]); getitem_27 = None + view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None + permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None + addmm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_33, view_30, permute_21); primals_33 = None + view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_9, [1, 64, 768]); addmm_9 = None + clone_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_31); view_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, clone_5); clone_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True) + getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0] + getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1]; var_mean_5 = None + add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None + rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20); add_20 = None + sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32) + mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None + mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34); mul_18 = None + add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_21, [64, 768]); add_21 = None + permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None + addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None + view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]); addmm_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22); mul_22 = None + alias_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_2) + alias_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_10); alias_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_23, [64, 3072]); mul_23 = None + permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None + addmm_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_39, view_34, permute_23); primals_39 = None + view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_11, [1, 64, 768]); addmm_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_35); view_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, clone_6); clone_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True) + getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0] + getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1]; var_mean_6 = None + add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None + rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25); add_25 = None + sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34) + mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None + mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40); mul_24 = None + add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_26, [64, 768]); add_26 = None + permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None + addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None + view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]); addmm_12 = None + split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None + getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0] + getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1] + getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2]; split_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None + permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None + permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None + permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True) + getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0] + getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1] + getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2] + getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None + alias_12: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_38) + alias_13: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_12); alias_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]); getitem_38 = None + view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None + permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None + addmm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_45, view_42, permute_29); primals_45 = None + view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_13, [1, 64, 768]); addmm_13 = None + clone_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_43); view_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, clone_7); clone_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True) + getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0] + getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1]; var_mean_7 = None + add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None + rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28); add_28 = None + sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43) + mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None + mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46); mul_26 = None + add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_29, [64, 768]); add_29 = None + permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None + addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None + view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]); addmm_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30); mul_30 = None + alias_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_3) + alias_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_14); alias_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_31, [64, 3072]); mul_31 = None + permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None + addmm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_51, view_46, permute_31); primals_51 = None + view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_15, [1, 64, 768]); addmm_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_47); view_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, clone_8); clone_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True) + getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0] + getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1]; var_mean_8 = None + add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None + rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33); add_33 = None + sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45) + mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None + mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52); mul_32 = None + add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_34, [64, 768]); add_34 = None + permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None + addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None + view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]); addmm_16 = None + split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None + getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0] + getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1] + getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2]; split_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None + permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None + permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None + permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True) + getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0] + getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1] + getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2] + getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None + alias_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_49) + alias_17: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_16); alias_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]); getitem_49 = None + view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None + permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None + addmm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_57, view_54, permute_37); primals_57 = None + view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_17, [1, 64, 768]); addmm_17 = None + clone_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_55); view_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, clone_9); clone_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True) + getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0] + getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1]; var_mean_9 = None + add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None + rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36); add_36 = None + sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54) + mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None + mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58); mul_34 = None + add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_37, [64, 768]); add_37 = None + permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None + addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None + view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]); addmm_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38); mul_38 = None + alias_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_4) + alias_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_18); alias_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_39, [64, 3072]); mul_39 = None + permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None + addmm_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_63, view_58, permute_39); primals_63 = None + view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_19, [1, 64, 768]); addmm_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_59); view_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, clone_10); clone_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True) + getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0] + getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1]; var_mean_10 = None + add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None + rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41); add_41 = None + sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56) + mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None + mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64); mul_40 = None + add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_42, [64, 768]); add_42 = None + permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None + addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None + view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]); addmm_20 = None + split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None + getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0] + getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1] + getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2]; split_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None + permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None + permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None + permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True) + getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0] + getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1] + getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2] + getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None + alias_20: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_60) + alias_21: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_20); alias_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]); getitem_60 = None + view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None + permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None + addmm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_69, view_66, permute_45); primals_69 = None + view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_21, [1, 64, 768]); addmm_21 = None + clone_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_67); view_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, clone_11); clone_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True) + getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0] + getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1]; var_mean_11 = None + add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None + rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44); add_44 = None + sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65) + mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None + mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70); mul_42 = None + add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_45, [64, 768]); add_45 = None + permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None + addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None + view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]); addmm_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46); mul_46 = None + alias_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_5) + alias_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_22); alias_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_47, [64, 3072]); mul_47 = None + permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None + addmm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_75, view_70, permute_47); primals_75 = None + view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_23, [1, 64, 768]); addmm_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_71); view_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, clone_12); clone_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True) + getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0] + getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1]; var_mean_12 = None + add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None + rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49); add_49 = None + sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67) + mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None + mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76); mul_48 = None + add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_50, [64, 768]); add_50 = None + permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None + addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None + view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]); addmm_24 = None + split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None + getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0] + getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1] + getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2]; split_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None + permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None + permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None + permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True) + getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0] + getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1] + getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2] + getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None + alias_24: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_71) + alias_25: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_24); alias_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]); getitem_71 = None + view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None + permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None + addmm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_81, view_78, permute_53); primals_81 = None + view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_25, [1, 64, 768]); addmm_25 = None + clone_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_79); view_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, clone_13); clone_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True) + getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0] + getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1]; var_mean_13 = None + add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None + rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52); add_52 = None + sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76) + mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None + mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82); mul_50 = None + add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_53, [64, 768]); add_53 = None + permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None + addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None + view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]); addmm_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None + mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54); mul_54 = None + alias_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_6) + alias_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_26); alias_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_55, [64, 3072]); mul_55 = None + permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None + addmm_27: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_87, view_82, permute_55); primals_87 = None + view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_27, [1, 64, 768]); addmm_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_83); view_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, clone_14); clone_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True) + getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0] + getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1]; var_mean_14 = None + add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None + rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57); add_57 = None + sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78) + mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None + mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88); mul_56 = None + add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_58, [64, 768]); add_58 = None + permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None + addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None + view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]); addmm_28 = None + split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None + getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0] + getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1] + getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2]; split_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None + permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None + permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None + permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True) + getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0] + getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1] + getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2] + getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None + alias_28: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_82) + alias_29: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_28); alias_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]); getitem_82 = None + view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None + permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None + addmm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_93, view_90, permute_61); primals_93 = None + view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_29, [1, 64, 768]); addmm_29 = None + clone_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_91); view_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, clone_15); clone_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True) + getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0] + getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1]; var_mean_15 = None + add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None + rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60); add_60 = None + sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87) + mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None + mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94); mul_58 = None + add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_61, [64, 768]); add_61 = None + permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None + addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None + view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]); addmm_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None + mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62); mul_62 = None + alias_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_7) + alias_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_30); alias_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_63, [64, 3072]); mul_63 = None + permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None + addmm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_99, view_94, permute_63); primals_99 = None + view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_31, [1, 64, 768]); addmm_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_95); view_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, clone_16); clone_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True) + getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0] + getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1]; var_mean_16 = None + add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None + rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65); add_65 = None + sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89) + mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None + mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100); mul_64 = None + add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_66, [64, 768]); add_66 = None + permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None + addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None + view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]); addmm_32 = None + split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None + getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0] + getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1] + getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2]; split_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None + permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None + permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None + permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True) + getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0] + getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1] + getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2] + getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None + alias_32: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_93) + alias_33: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_32); alias_32 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]); getitem_93 = None + view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None + permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None + addmm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_105, view_102, permute_69); primals_105 = None + view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_33, [1, 64, 768]); addmm_33 = None + clone_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_103); view_103 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, clone_17); clone_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True) + getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0] + getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1]; var_mean_17 = None + add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None + rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68); add_68 = None + sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98) + mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None + mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106); mul_66 = None + add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_69, [64, 768]); add_69 = None + permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None + addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None + view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]); addmm_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None + mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70); mul_70 = None + alias_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_8) + alias_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_34); alias_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_71, [64, 3072]); mul_71 = None + permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None + addmm_35: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_111, view_106, permute_71); primals_111 = None + view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_35, [1, 64, 768]); addmm_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_107); view_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, clone_18); clone_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True) + getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0] + getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1]; var_mean_18 = None + add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None + rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73); add_73 = None + sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100) + mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None + mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112); mul_72 = None + add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_74, [64, 768]); add_74 = None + permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None + addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None + view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]); addmm_36 = None + split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None + getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0] + getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1] + getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2]; split_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None + permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None + permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None + permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True) + getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0] + getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1] + getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2] + getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None + alias_36: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_104) + alias_37: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_36); alias_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]); getitem_104 = None + view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None + permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None + addmm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_117, view_114, permute_77); primals_117 = None + view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_37, [1, 64, 768]); addmm_37 = None + clone_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_115); view_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, clone_19); clone_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True) + getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0] + getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1]; var_mean_19 = None + add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None + rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76); add_76 = None + sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109) + mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None + mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118); mul_74 = None + add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_77, [64, 768]); add_77 = None + permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None + addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None + view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]); addmm_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None + mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78); mul_78 = None + alias_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_9) + alias_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_38); alias_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_79, [64, 3072]); mul_79 = None + permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None + addmm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_123, view_118, permute_79); primals_123 = None + view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_39, [1, 64, 768]); addmm_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_119); view_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, clone_20); clone_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True) + getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0] + getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1]; var_mean_20 = None + add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None + rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81); add_81 = None + sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111) + mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None + mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124); mul_80 = None + add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_82, [64, 768]); add_82 = None + permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None + addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None + view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]); addmm_40 = None + split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None + getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0] + getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1] + getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2]; split_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None + permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None + permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None + permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True) + getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0] + getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1] + getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2] + getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None + alias_40: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_115) + alias_41: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_40); alias_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]); getitem_115 = None + view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None + permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None + addmm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_129, view_126, permute_85); primals_129 = None + view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_41, [1, 64, 768]); addmm_41 = None + clone_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_127); view_127 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, clone_21); clone_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True) + getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0] + getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1]; var_mean_21 = None + add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None + rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84); add_84 = None + sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120) + mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None + mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130); mul_82 = None + add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_85, [64, 768]); add_85 = None + permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None + addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None + view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]); addmm_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None + mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86); mul_86 = None + alias_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_10) + alias_43: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_42); alias_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_87, [64, 3072]); mul_87 = None + permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None + addmm_43: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_135, view_130, permute_87); primals_135 = None + view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_43, [1, 64, 768]); addmm_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_131); view_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, clone_22); clone_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True) + getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0] + getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1]; var_mean_22 = None + add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None + rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89); add_89 = None + sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122) + mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None + mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136); mul_88 = None + add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_90, [64, 768]); add_90 = None + permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None + addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None + view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]); addmm_44 = None + split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None + getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0] + getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1] + getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2]; split_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None + permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None + permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None + permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True) + getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0] + getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1] + getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2] + getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None + alias_44: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_126) + alias_45: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_44); alias_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]); getitem_126 = None + view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None + permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None + addmm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_141, view_138, permute_93); primals_141 = None + view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_45, [1, 64, 768]); addmm_45 = None + clone_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_139); view_139 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, clone_23); clone_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True) + getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0] + getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1]; var_mean_23 = None + add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None + rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92); add_92 = None + sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131) + mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None + mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142); mul_90 = None + add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_93, [64, 768]); add_93 = None + permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None + addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None + view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]); addmm_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None + mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94); mul_94 = None + alias_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_11) + alias_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_46); alias_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_95, [64, 3072]); mul_95 = None + permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None + addmm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_147, view_142, permute_95); primals_147 = None + view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_47, [1, 64, 768]); addmm_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x) + clone_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_143); view_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, clone_24); clone_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True) + getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0] + getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1]; var_mean_24 = None + add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None + rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97); add_97 = None + sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133) + mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None + mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148); mul_96 = None + add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + slice_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice.Tensor(add_98, 0, 0, 9223372036854775807); add_98 = None + _tensor_constant0 = self._tensor_constant0 + lift_fresh_copy: "i64[1][1]cuda:0" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None + slice_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice.Tensor(slice_1, 2, 0, 9223372036854775807); slice_1 = None + index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(slice_2, [None, lift_fresh_copy]); slice_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None + view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.view.default(index, [1, 768]); index = None + mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96) + view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.view.default(mm, [1, 1, 50304]); mm = None + view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.view.default(tangents_1, [1, 50304]); tangents_1 = None + permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0]) + mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None + permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None + permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None + mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None + view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_2, [1, 1, 768]); mm_2 = None + permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + full: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put.default(full, [None, lift_fresh_copy], view_147, True); full = lift_fresh_copy = view_147 = None + full_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + slice_scatter: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice_scatter.default(full_1, index_put, 2, 0, 9223372036854775807); full_1 = index_put = None + full_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + slice_scatter_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice_scatter.default(full_2, slice_scatter, 0, 0, 9223372036854775807); full_2 = slice_scatter = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None + mul_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_25, rsqrt_24); sub_25 = None + mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(slice_scatter_1, primals_148); primals_148 = None + mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768) + sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True) + mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_98); mul_99 = None + sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None + mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_98, sum_2); sum_2 = None + sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None + sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None + div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None + mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None + mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(slice_scatter_1, mul_98); mul_98 = None + sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None + sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(slice_scatter_1, [0, 1]); slice_scatter_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(mul_103, [64, 768]) + permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None + mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None + permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0]) + mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None + permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None + sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None + view_149: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_5, [768]); sum_5 = None + permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None + view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_3, [1, 64, 3072]); mm_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None + mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_48: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_47); alias_47 = None + alias_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_48); alias_48 = None + mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_49, alias_49); alias_49 = None + sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None + mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None + mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None + mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715) + pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None + mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None + mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_100, [64, 3072]); add_100 = None + permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None + mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None + permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0]) + mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None + permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None + sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None + view_152: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_6, [3072]); sum_6 = None + permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None + view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_5, [1, 64, 768]); mm_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131); add_91 = getitem_131 = None + mul_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_29, rsqrt_23); sub_29 = None + mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None + mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768) + sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True) + mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_114); mul_115 = None + sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None + mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_114, sum_8); sum_8 = None + sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None + sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None + div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None + mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None + mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_114); mul_114 = None + sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None + sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_101, [64, 768]) + permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None + mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None + permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0]) + mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None + permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None + sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None + view_155: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_11, [768]); sum_11 = None + permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None + view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_7, [1, 64, 768]); mm_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]); view_156 = None + permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_50: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_45); alias_45 = None + alias_51: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_50); alias_50 = None + _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, alias_51, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = alias_51 = getitem_127 = getitem_128 = getitem_129 = None + getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0] + getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1] + getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None + view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_114, [1, 64, 768]); permute_114 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None + view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_115, [1, 64, 768]); permute_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None + view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_116, [1, 64, 768]); permute_116 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None + view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat, [64, 2304]); cat = None + permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None + mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None + permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0]) + mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None + permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None + sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None + view_162: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_12, [2304]); sum_12 = None + permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None + view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_9, [1, 64, 768]); mm_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122); add_88 = getitem_122 = None + mul_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_32, rsqrt_22); sub_32 = None + mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None + mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768) + sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True) + mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_121); mul_122 = None + sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None + mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_121, sum_14); sum_14 = None + sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None + sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None + div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None + mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None + mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_121); mul_121 = None + sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None + sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_102, [64, 768]) + permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None + mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None + permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0]) + mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None + permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None + sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None + view_165: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_17, [768]); sum_17 = None + permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None + view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_11, [1, 64, 3072]); mm_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None + mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_43); alias_43 = None + alias_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_52); alias_52 = None + mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_53, alias_53); alias_53 = None + sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None + mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None + mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None + mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715) + pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None + mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None + mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_104, [64, 3072]); add_104 = None + permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None + mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None + permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0]) + mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None + permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None + sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None + view_168: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_18, [3072]); sum_18 = None + permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None + view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_13, [1, 64, 768]); mm_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_36: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120); add_83 = getitem_120 = None + mul_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_36, rsqrt_21); sub_36 = None + mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None + mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768) + sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True) + mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_137); mul_138 = None + sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None + mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_137, sum_20); sum_20 = None + sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None + sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None + div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None + mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None + mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_137); mul_137 = None + sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None + sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_105, [64, 768]) + permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None + mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None + permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0]) + mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None + permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None + sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None + view_171: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_23, [768]); sum_23 = None + permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None + view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_15, [1, 64, 768]); mm_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]); view_172 = None + permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_54: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_41); alias_41 = None + alias_55: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_54); alias_54 = None + _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, alias_55, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = alias_55 = getitem_116 = getitem_117 = getitem_118 = None + getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0] + getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1] + getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None + view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_134, [1, 64, 768]); permute_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None + view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_135, [1, 64, 768]); permute_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None + view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_136, [1, 64, 768]); permute_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None + view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_1, [64, 2304]); cat_1 = None + permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None + mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None + permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0]) + mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None + permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None + sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None + view_178: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_24, [2304]); sum_24 = None + permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None + view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_17, [1, 64, 768]); mm_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_39: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111); add_80 = getitem_111 = None + mul_144: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_39, rsqrt_20); sub_39 = None + mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None + mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768) + sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True) + mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_144); mul_145 = None + sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None + mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_144, sum_26); sum_26 = None + sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None + sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None + div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None + mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None + mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_144); mul_144 = None + sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None + sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_106, [64, 768]) + permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None + mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None + permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0]) + mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None + permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None + sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None + view_181: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_29, [768]); sum_29 = None + permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None + view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_19, [1, 64, 3072]); mm_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None + mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_39); alias_39 = None + alias_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_56); alias_56 = None + mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_57, alias_57); alias_57 = None + sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None + mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None + mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None + mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715) + pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None + mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None + mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_108, [64, 3072]); add_108 = None + permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None + mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None + permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0]) + mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None + permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None + sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None + view_184: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_30, [3072]); sum_30 = None + permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None + view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_21, [1, 64, 768]); mm_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109); add_75 = getitem_109 = None + mul_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_43, rsqrt_19); sub_43 = None + mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None + mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768) + sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True) + mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_160); mul_161 = None + sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None + mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_160, sum_32); sum_32 = None + sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None + sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None + div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None + mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None + mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_160); mul_160 = None + sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None + sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_109, [64, 768]) + permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None + mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None + permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0]) + mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None + permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None + sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None + view_187: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_35, [768]); sum_35 = None + permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None + view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_23, [1, 64, 768]); mm_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]); view_188 = None + permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_58: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_37); alias_37 = None + alias_59: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_58); alias_58 = None + _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, alias_59, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = alias_59 = getitem_105 = getitem_106 = getitem_107 = None + getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0] + getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1] + getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None + view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_154, [1, 64, 768]); permute_154 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None + view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_155, [1, 64, 768]); permute_155 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None + view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_156, [1, 64, 768]); permute_156 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None + view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_2, [64, 2304]); cat_2 = None + permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None + mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None + permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0]) + mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None + permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None + sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None + view_194: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_36, [2304]); sum_36 = None + permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None + view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_25, [1, 64, 768]); mm_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_46: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100); add_72 = getitem_100 = None + mul_167: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_46, rsqrt_18); sub_46 = None + mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None + mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768) + sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True) + mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_167); mul_168 = None + sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None + mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_167, sum_38); sum_38 = None + sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None + sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None + div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None + mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None + mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_167); mul_167 = None + sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None + sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_110, [64, 768]) + permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None + mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None + permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0]) + mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None + permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None + sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None + view_197: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_41, [768]); sum_41 = None + permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None + view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_27, [1, 64, 3072]); mm_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None + mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_35); alias_35 = None + alias_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_60); alias_60 = None + mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_61, alias_61); alias_61 = None + sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None + mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None + mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None + mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715) + pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None + mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None + mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_112, [64, 3072]); add_112 = None + permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None + mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None + permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0]) + mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None + permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None + sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None + view_200: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_42, [3072]); sum_42 = None + permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None + view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_29, [1, 64, 768]); mm_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98); add_67 = getitem_98 = None + mul_183: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_50, rsqrt_17); sub_50 = None + mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None + mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768) + sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True) + mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_183); mul_184 = None + sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None + mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_183, sum_44); sum_44 = None + sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None + sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None + div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None + mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None + mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_183); mul_183 = None + sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None + sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_113, [64, 768]) + permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None + mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None + permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0]) + mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None + permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None + sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None + view_203: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_47, [768]); sum_47 = None + permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None + view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_31, [1, 64, 768]); mm_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]); view_204 = None + permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_62: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_33); alias_33 = None + alias_63: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_62); alias_62 = None + _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, alias_63, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = alias_63 = getitem_94 = getitem_95 = getitem_96 = None + getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0] + getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1] + getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None + view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_174, [1, 64, 768]); permute_174 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None + view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_175, [1, 64, 768]); permute_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None + view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_176, [1, 64, 768]); permute_176 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None + view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_3, [64, 2304]); cat_3 = None + permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None + mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None + permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0]) + mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None + permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None + sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None + view_210: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_48, [2304]); sum_48 = None + permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None + view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_33, [1, 64, 768]); mm_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89); add_64 = getitem_89 = None + mul_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_53, rsqrt_16); sub_53 = None + mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None + mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768) + sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True) + mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_190); mul_191 = None + sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None + mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_190, sum_50); sum_50 = None + sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None + sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None + div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None + mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None + mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_190); mul_190 = None + sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None + sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_114, [64, 768]) + permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None + mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None + permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0]) + mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None + permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None + sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None + view_213: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_53, [768]); sum_53 = None + permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None + view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_35, [1, 64, 3072]); mm_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None + mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_64: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_31); alias_31 = None + alias_65: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_64); alias_64 = None + mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_65, alias_65); alias_65 = None + sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None + mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None + mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None + mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715) + pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None + mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None + mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_116, [64, 3072]); add_116 = None + permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None + mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None + permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0]) + mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None + permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None + sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None + view_216: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_54, [3072]); sum_54 = None + permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None + view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_37, [1, 64, 768]); mm_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87); add_59 = getitem_87 = None + mul_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_57, rsqrt_15); sub_57 = None + mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None + mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768) + sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True) + mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_206); mul_207 = None + sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None + mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_206, sum_56); sum_56 = None + sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None + sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None + div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None + mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None + mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_206); mul_206 = None + sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None + sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_117, [64, 768]) + permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None + mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None + permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0]) + mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None + permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None + sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None + view_219: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_59, [768]); sum_59 = None + permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None + view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_39, [1, 64, 768]); mm_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]); view_220 = None + permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_66: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_29); alias_29 = None + alias_67: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_66); alias_66 = None + _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, alias_67, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = alias_67 = getitem_83 = getitem_84 = getitem_85 = None + getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0] + getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1] + getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None + view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_194, [1, 64, 768]); permute_194 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None + view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_195, [1, 64, 768]); permute_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None + view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_196, [1, 64, 768]); permute_196 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None + view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_4, [64, 2304]); cat_4 = None + permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None + mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None + permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0]) + mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None + permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None + sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None + view_226: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_60, [2304]); sum_60 = None + permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None + view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_41, [1, 64, 768]); mm_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_60: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78); add_56 = getitem_78 = None + mul_213: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_60, rsqrt_14); sub_60 = None + mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None + mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768) + sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True) + mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_213); mul_214 = None + sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None + mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_213, sum_62); sum_62 = None + sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None + sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None + div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None + mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None + mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_213); mul_213 = None + sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None + sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_118, [64, 768]) + permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None + mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None + permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0]) + mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None + permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None + sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None + view_229: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_65, [768]); sum_65 = None + permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None + view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_43, [1, 64, 3072]); mm_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None + mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_27); alias_27 = None + alias_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_68); alias_68 = None + mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_69, alias_69); alias_69 = None + sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None + mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None + mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None + mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715) + pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None + mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None + mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_120, [64, 3072]); add_120 = None + permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None + mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None + permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0]) + mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None + permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None + sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None + view_232: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_66, [3072]); sum_66 = None + permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None + view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_45, [1, 64, 768]); mm_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76); add_51 = getitem_76 = None + mul_229: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_64, rsqrt_13); sub_64 = None + mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None + mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768) + sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True) + mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_229); mul_230 = None + sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None + mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_229, sum_68); sum_68 = None + sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None + sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None + div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None + mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None + mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_229); mul_229 = None + sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None + sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_121, [64, 768]) + permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None + mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None + permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0]) + mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None + permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None + sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None + view_235: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_71, [768]); sum_71 = None + permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None + view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_47, [1, 64, 768]); mm_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]); view_236 = None + permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_70: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_25); alias_25 = None + alias_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_70); alias_70 = None + _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, alias_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = alias_71 = getitem_72 = getitem_73 = getitem_74 = None + getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0] + getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1] + getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None + view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_214, [1, 64, 768]); permute_214 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None + view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_215, [1, 64, 768]); permute_215 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None + view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_216, [1, 64, 768]); permute_216 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None + view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_5, [64, 2304]); cat_5 = None + permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None + mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None + permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0]) + mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None + permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None + sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None + view_242: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_72, [2304]); sum_72 = None + permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None + view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_49, [1, 64, 768]); mm_49 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67); add_48 = getitem_67 = None + mul_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_67, rsqrt_12); sub_67 = None + mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None + mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768) + sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True) + mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_236); mul_237 = None + sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None + mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_236, sum_74); sum_74 = None + sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None + sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None + div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None + mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None + mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_236); mul_236 = None + sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None + sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_122, [64, 768]) + permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None + mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None + permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0]) + mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None + permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None + sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None + view_245: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_77, [768]); sum_77 = None + permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None + view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_51, [1, 64, 3072]); mm_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None + mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_72: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_23); alias_23 = None + alias_73: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_72); alias_72 = None + mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_73, alias_73); alias_73 = None + sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None + mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None + mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None + mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715) + pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None + mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None + mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_124, [64, 3072]); add_124 = None + permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None + mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None + permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0]) + mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None + permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None + sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None + view_248: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_78, [3072]); sum_78 = None + permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None + view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_53, [1, 64, 768]); mm_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65); add_43 = getitem_65 = None + mul_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_71, rsqrt_11); sub_71 = None + mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None + mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768) + sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True) + mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_252); mul_253 = None + sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None + mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_252, sum_80); sum_80 = None + sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None + sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None + div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None + mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None + mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_252); mul_252 = None + sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None + sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_125, [64, 768]) + permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None + mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None + permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0]) + mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None + permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None + sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None + view_251: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_83, [768]); sum_83 = None + permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None + view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_55, [1, 64, 768]); mm_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]); view_252 = None + permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_74: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_21); alias_21 = None + alias_75: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_74); alias_74 = None + _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, alias_75, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = alias_75 = getitem_61 = getitem_62 = getitem_63 = None + getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0] + getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1] + getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None + view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_234, [1, 64, 768]); permute_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None + view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_235, [1, 64, 768]); permute_235 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None + view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_236, [1, 64, 768]); permute_236 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None + view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_6, [64, 2304]); cat_6 = None + permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None + mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None + permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0]) + mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None + permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None + sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None + view_258: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_84, [2304]); sum_84 = None + permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None + view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_57, [1, 64, 768]); mm_57 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56); add_40 = getitem_56 = None + mul_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_74, rsqrt_10); sub_74 = None + mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None + mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768) + sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True) + mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_259); mul_260 = None + sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None + mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_259, sum_86); sum_86 = None + sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None + sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None + div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None + mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None + mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_259); mul_259 = None + sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None + sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_126, [64, 768]) + permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None + mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None + permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0]) + mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None + permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None + sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None + view_261: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_89, [768]); sum_89 = None + permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None + view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_59, [1, 64, 3072]); mm_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None + mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_19); alias_19 = None + alias_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_76); alias_76 = None + mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_77, alias_77); alias_77 = None + sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None + mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None + mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None + mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715) + pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None + mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None + mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_128, [64, 3072]); add_128 = None + permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None + mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None + permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0]) + mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None + permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None + sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None + view_264: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_90, [3072]); sum_90 = None + permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None + view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_61, [1, 64, 768]); mm_61 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_78: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54); add_35 = getitem_54 = None + mul_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_78, rsqrt_9); sub_78 = None + mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None + mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768) + sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True) + mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_275); mul_276 = None + sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None + mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_275, sum_92); sum_92 = None + sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None + sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None + div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None + mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None + mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_275); mul_275 = None + sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None + sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_129, [64, 768]) + permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None + mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None + permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0]) + mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None + permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None + sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None + view_267: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_95, [768]); sum_95 = None + permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None + view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_63, [1, 64, 768]); mm_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]); view_268 = None + permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_78: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_17); alias_17 = None + alias_79: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_78); alias_78 = None + _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, alias_79, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = alias_79 = getitem_50 = getitem_51 = getitem_52 = None + getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0] + getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1] + getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None + view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_254, [1, 64, 768]); permute_254 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None + view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_255, [1, 64, 768]); permute_255 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None + view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_256, [1, 64, 768]); permute_256 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None + view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_7, [64, 2304]); cat_7 = None + permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None + mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None + permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0]) + mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None + permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None + sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None + view_274: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_96, [2304]); sum_96 = None + permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None + view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_65, [1, 64, 768]); mm_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45); add_32 = getitem_45 = None + mul_282: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_81, rsqrt_8); sub_81 = None + mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None + mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768) + sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True) + mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_282); mul_283 = None + sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None + mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_282, sum_98); sum_98 = None + sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None + sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None + div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None + mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None + mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_282); mul_282 = None + sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None + sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_130, [64, 768]) + permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None + mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None + permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0]) + mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None + permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None + sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None + view_277: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_101, [768]); sum_101 = None + permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None + view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_67, [1, 64, 3072]); mm_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None + mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_80: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_15); alias_15 = None + alias_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_80); alias_80 = None + mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_81, alias_81); alias_81 = None + sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None + mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None + mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None + mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715) + pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None + mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None + mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_132, [64, 3072]); add_132 = None + permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None + mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None + permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0]) + mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None + permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None + sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None + view_280: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_102, [3072]); sum_102 = None + permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None + view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_69, [1, 64, 768]); mm_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43); add_27 = getitem_43 = None + mul_298: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_85, rsqrt_7); sub_85 = None + mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None + mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768) + sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True) + mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_298); mul_299 = None + sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None + mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_298, sum_104); sum_104 = None + sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None + sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None + div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None + mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None + mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_298); mul_298 = None + sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None + sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_133, [64, 768]) + permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None + mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None + permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0]) + mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None + permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None + sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None + view_283: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_107, [768]); sum_107 = None + permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None + view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_71, [1, 64, 768]); mm_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]); view_284 = None + permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_13); alias_13 = None + alias_83: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_82); alias_82 = None + _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, alias_83, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = alias_83 = getitem_39 = getitem_40 = getitem_41 = None + getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0] + getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1] + getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None + view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_274, [1, 64, 768]); permute_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None + view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_275, [1, 64, 768]); permute_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None + view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_276, [1, 64, 768]); permute_276 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None + view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_8, [64, 2304]); cat_8 = None + permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None + mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None + permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0]) + mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None + permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None + sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None + view_290: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_108, [2304]); sum_108 = None + permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None + view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_73, [1, 64, 768]); mm_73 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34); add_24 = getitem_34 = None + mul_305: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_88, rsqrt_6); sub_88 = None + mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None + mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768) + sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True) + mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_305); mul_306 = None + sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None + mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_305, sum_110); sum_110 = None + sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None + sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None + div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None + mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None + mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_305); mul_305 = None + sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None + sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_134, [64, 768]) + permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None + mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None + permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0]) + mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None + permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None + sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None + view_293: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_113, [768]); sum_113 = None + permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None + view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_75, [1, 64, 3072]); mm_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None + mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_11); alias_11 = None + alias_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_84); alias_84 = None + mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_85, alias_85); alias_85 = None + sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None + mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None + mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None + mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715) + pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None + mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None + mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_136, [64, 3072]); add_136 = None + permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None + mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None + permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0]) + mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None + permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None + sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None + view_296: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_114, [3072]); sum_114 = None + permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None + view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_77, [1, 64, 768]); mm_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_92: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32); add_19 = getitem_32 = None + mul_321: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_92, rsqrt_5); sub_92 = None + mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None + mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768) + sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True) + mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_321); mul_322 = None + sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None + mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_321, sum_116); sum_116 = None + sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None + sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None + div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None + mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None + mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_321); mul_321 = None + sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None + sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_137, [64, 768]) + permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None + mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None + permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0]) + mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None + permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None + sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None + view_299: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_119, [768]); sum_119 = None + permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None + view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_79, [1, 64, 768]); mm_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]); view_300 = None + permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_86: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_9); alias_9 = None + alias_87: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_86); alias_86 = None + _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, alias_87, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = alias_87 = getitem_28 = getitem_29 = getitem_30 = None + getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0] + getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1] + getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None + view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_294, [1, 64, 768]); permute_294 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None + view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_295, [1, 64, 768]); permute_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None + view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_296, [1, 64, 768]); permute_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None + view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_9, [64, 2304]); cat_9 = None + permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None + mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None + permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0]) + mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None + permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None + sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None + view_306: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_120, [2304]); sum_120 = None + permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None + view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_81, [1, 64, 768]); mm_81 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23); add_16 = getitem_23 = None + mul_328: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_95, rsqrt_4); sub_95 = None + mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None + mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768) + sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True) + mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_328); mul_329 = None + sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None + mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_328, sum_122); sum_122 = None + sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None + sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None + div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None + mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None + mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_328); mul_328 = None + sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None + sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_138, [64, 768]) + permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None + mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None + permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0]) + mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None + permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None + sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None + view_309: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_125, [768]); sum_125 = None + permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None + view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_83, [1, 64, 3072]); mm_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None + mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_88: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_7); alias_7 = None + alias_89: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_88); alias_88 = None + mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_89, alias_89); alias_89 = None + sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None + mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None + mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None + mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715) + pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None + mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None + mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_140, [64, 3072]); add_140 = None + permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None + mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None + permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0]) + mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None + permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None + sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None + view_312: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_126, [3072]); sum_126 = None + permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None + view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_85, [1, 64, 768]); mm_85 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21); add_11 = getitem_21 = None + mul_344: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_99, rsqrt_3); sub_99 = None + mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None + mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768) + sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True) + mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_344); mul_345 = None + sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None + mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_344, sum_128); sum_128 = None + sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None + sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None + div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None + mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None + mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_344); mul_344 = None + sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None + sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_141, [64, 768]) + permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None + mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None + permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0]) + mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None + permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None + sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None + view_315: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_131, [768]); sum_131 = None + permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None + view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_87, [1, 64, 768]); mm_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]); view_316 = None + permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_90: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_5); alias_5 = None + alias_91: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_90); alias_90 = None + _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, alias_91, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = alias_91 = getitem_17 = getitem_18 = getitem_19 = None + getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0] + getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1] + getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None + view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_314, [1, 64, 768]); permute_314 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None + view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_315, [1, 64, 768]); permute_315 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None + view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_316, [1, 64, 768]); permute_316 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None + view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_10, [64, 2304]); cat_10 = None + permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None + mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None + permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0]) + mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None + permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None + sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None + view_322: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_132, [2304]); sum_132 = None + permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None + view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_89, [1, 64, 768]); mm_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12); add_8 = getitem_12 = None + mul_351: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_102, rsqrt_2); sub_102 = None + mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None + mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768) + sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True) + mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_351); mul_352 = None + sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None + mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_351, sum_134); sum_134 = None + sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None + sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None + div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None + mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None + mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_351); mul_351 = None + sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None + sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_142, [64, 768]) + permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None + mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None + permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0]) + mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None + permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None + sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None + view_325: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_137, [768]); sum_137 = None + permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None + view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_91, [1, 64, 3072]); mm_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None + mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + alias_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_3); alias_3 = None + alias_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_92); alias_92 = None + mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_93, alias_93); alias_93 = None + sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None + mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None + mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None + mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715) + pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None + mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None + mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_144, [64, 3072]); add_144 = None + permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None + mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None + permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0]) + mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None + permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None + sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None + view_328: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_138, [3072]); sum_138 = None + permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None + view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_93, [1, 64, 768]); mm_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10); add_3 = getitem_10 = None + mul_367: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_106, rsqrt_1); sub_106 = None + mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None + mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768) + sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True) + mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_367); mul_368 = None + sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None + mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_367, sum_140); sum_140 = None + sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None + sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None + div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None + mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None + mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_367); mul_367 = None + sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None + sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_145, [64, 768]) + permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None + mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None + permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0]) + mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None + permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None + sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None + view_331: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_143, [768]); sum_143 = None + permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None + view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_95, [1, 64, 768]); mm_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]); view_332 = None + permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + alias_94: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_1); alias_1 = None + alias_95: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_94); alias_94 = None + _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, alias_95, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = alias_95 = getitem_6 = getitem_7 = getitem_8 = None + getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0] + getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1] + getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None + view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_334, [1, 64, 768]); permute_334 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None + view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_335, [1, 64, 768]); permute_335 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None + view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_336, [1, 64, 768]); permute_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None + view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_11, [64, 2304]); cat_11 = None + permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]); permute = None + mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None + permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0]) + mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None + permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None + sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None + view_338: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_144, [2304]); sum_144 = None + permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None + view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_97, [1, 64, 768]); mm_97 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + sub_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(clone, getitem_1); clone = getitem_1 = None + mul_374: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_109, rsqrt); sub_109 = None + mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None + mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768) + sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True) + mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul_374); mul_375 = None + sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None + mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_374, sum_146); sum_146 = None + sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None + sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None + div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None + mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None + mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul_374); mul_374 = None + sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None + sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1) + unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1); eq = None + scalar_tensor: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) + where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, scalar_tensor, add_146); unsqueeze_1 = scalar_tensor = None + full_3: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_3, [unsqueeze], where, True); full_3 = unsqueeze = where = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1) + unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None + scalar_tensor_1: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) + where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, scalar_tensor_1, add_146); unsqueeze_2 = scalar_tensor_1 = add_146 = None + full_4: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_4, [primals_1], where_1, True); full_4 = primals_1 = where_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None + return pytree.tree_unflatten([view_145, None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4], self._out_spec) + +V0806 13:55:55.855000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:523] {"aot_forward_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "8c2323f7142c8f65a355535a234fd64e"} + class GraphModule(torch.nn.Module): + def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_2: "f32[50304, 768][768, 1]cuda:0", primals_3: "f32[1024, 768][768, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_5: "f32[768][1]cuda:0", primals_6: "f32[2304, 768][768, 1]cuda:0", primals_7: "f32[2304][1]cuda:0", primals_8: "f32[768, 768][768, 1]cuda:0", primals_9: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_11: "f32[768][1]cuda:0", primals_12: "f32[3072, 768][768, 1]cuda:0", primals_13: "f32[3072][1]cuda:0", primals_14: "f32[768, 3072][3072, 1]cuda:0", primals_15: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_17: "f32[768][1]cuda:0", primals_18: "f32[2304, 768][768, 1]cuda:0", primals_19: "f32[2304][1]cuda:0", primals_20: "f32[768, 768][768, 1]cuda:0", primals_21: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_23: "f32[768][1]cuda:0", primals_24: "f32[3072, 768][768, 1]cuda:0", primals_25: "f32[3072][1]cuda:0", primals_26: "f32[768, 3072][3072, 1]cuda:0", primals_27: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_29: "f32[768][1]cuda:0", primals_30: "f32[2304, 768][768, 1]cuda:0", primals_31: "f32[2304][1]cuda:0", primals_32: "f32[768, 768][768, 1]cuda:0", primals_33: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_35: "f32[768][1]cuda:0", primals_36: "f32[3072, 768][768, 1]cuda:0", primals_37: "f32[3072][1]cuda:0", primals_38: "f32[768, 3072][3072, 1]cuda:0", primals_39: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_41: "f32[768][1]cuda:0", primals_42: "f32[2304, 768][768, 1]cuda:0", primals_43: "f32[2304][1]cuda:0", primals_44: "f32[768, 768][768, 1]cuda:0", primals_45: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_47: "f32[768][1]cuda:0", primals_48: "f32[3072, 768][768, 1]cuda:0", primals_49: "f32[3072][1]cuda:0", primals_50: "f32[768, 3072][3072, 1]cuda:0", primals_51: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_53: "f32[768][1]cuda:0", primals_54: "f32[2304, 768][768, 1]cuda:0", primals_55: "f32[2304][1]cuda:0", primals_56: "f32[768, 768][768, 1]cuda:0", primals_57: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_59: "f32[768][1]cuda:0", primals_60: "f32[3072, 768][768, 1]cuda:0", primals_61: "f32[3072][1]cuda:0", primals_62: "f32[768, 3072][3072, 1]cuda:0", primals_63: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_65: "f32[768][1]cuda:0", primals_66: "f32[2304, 768][768, 1]cuda:0", primals_67: "f32[2304][1]cuda:0", primals_68: "f32[768, 768][768, 1]cuda:0", primals_69: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_71: "f32[768][1]cuda:0", primals_72: "f32[3072, 768][768, 1]cuda:0", primals_73: "f32[3072][1]cuda:0", primals_74: "f32[768, 3072][3072, 1]cuda:0", primals_75: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_77: "f32[768][1]cuda:0", primals_78: "f32[2304, 768][768, 1]cuda:0", primals_79: "f32[2304][1]cuda:0", primals_80: "f32[768, 768][768, 1]cuda:0", primals_81: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_83: "f32[768][1]cuda:0", primals_84: "f32[3072, 768][768, 1]cuda:0", primals_85: "f32[3072][1]cuda:0", primals_86: "f32[768, 3072][3072, 1]cuda:0", primals_87: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_89: "f32[768][1]cuda:0", primals_90: "f32[2304, 768][768, 1]cuda:0", primals_91: "f32[2304][1]cuda:0", primals_92: "f32[768, 768][768, 1]cuda:0", primals_93: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_95: "f32[768][1]cuda:0", primals_96: "f32[3072, 768][768, 1]cuda:0", primals_97: "f32[3072][1]cuda:0", primals_98: "f32[768, 3072][3072, 1]cuda:0", primals_99: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_101: "f32[768][1]cuda:0", primals_102: "f32[2304, 768][768, 1]cuda:0", primals_103: "f32[2304][1]cuda:0", primals_104: "f32[768, 768][768, 1]cuda:0", primals_105: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_107: "f32[768][1]cuda:0", primals_108: "f32[3072, 768][768, 1]cuda:0", primals_109: "f32[3072][1]cuda:0", primals_110: "f32[768, 3072][3072, 1]cuda:0", primals_111: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_113: "f32[768][1]cuda:0", primals_114: "f32[2304, 768][768, 1]cuda:0", primals_115: "f32[2304][1]cuda:0", primals_116: "f32[768, 768][768, 1]cuda:0", primals_117: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_119: "f32[768][1]cuda:0", primals_120: "f32[3072, 768][768, 1]cuda:0", primals_121: "f32[3072][1]cuda:0", primals_122: "f32[768, 3072][3072, 1]cuda:0", primals_123: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_125: "f32[768][1]cuda:0", primals_126: "f32[2304, 768][768, 1]cuda:0", primals_127: "f32[2304][1]cuda:0", primals_128: "f32[768, 768][768, 1]cuda:0", primals_129: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_131: "f32[768][1]cuda:0", primals_132: "f32[3072, 768][768, 1]cuda:0", primals_133: "f32[3072][1]cuda:0", primals_134: "f32[768, 3072][3072, 1]cuda:0", primals_135: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_137: "f32[768][1]cuda:0", primals_138: "f32[2304, 768][768, 1]cuda:0", primals_139: "f32[2304][1]cuda:0", primals_140: "f32[768, 768][768, 1]cuda:0", primals_141: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_143: "f32[768][1]cuda:0", primals_144: "f32[3072, 768][768, 1]cuda:0", primals_145: "f32[3072][1]cuda:0", primals_146: "f32[768, 3072][3072, 1]cuda:0", primals_147: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", primals_149: "f32[768][1]cuda:0"): + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze( + iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) + unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0); iota = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb) + add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True) + getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0] + getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1]; var_mean = None + add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None + rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1); add_1 = None + sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add, getitem_1); getitem_1 = None + mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None + mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4) + add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_2, [64, 768]); add_2 = None + permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None + addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None + view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm, [1, 64, 2304]); addmm = None + split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None + getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0] + getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1] + getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2]; split = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None + permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None + permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None + permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True) + getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0] + getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1] + getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2] + getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None + permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None + addmm_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_9, view_6, permute_5); primals_9 = view_6 = None + view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_1, [1, 64, 768]); addmm_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add, view_7); add = view_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True) + getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0] + getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1]; var_mean_1 = None + add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None + rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4); add_4 = None + sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10); getitem_10 = None + mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None + mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10) + add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_5, [64, 768]); add_5 = None + permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None + addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None + view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5); view_9 = mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6); mul_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7); mul_4 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_7, [64, 3072]); mul_7 = None + permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None + addmm_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_15, view_10, permute_7); primals_15 = None + view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_3, [1, 64, 768]); addmm_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, view_11); add_3 = view_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True) + getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0] + getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1]; var_mean_2 = None + add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None + rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9); add_9 = None + sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12); getitem_12 = None + mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None + mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16) + add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_10, [64, 768]); add_10 = None + permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None + addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None + view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]); addmm_4 = None + split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None + getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0] + getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1] + getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2]; split_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None + permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None + permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None + permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True) + getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0] + getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1] + getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2] + getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None + permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None + addmm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_21, view_18, permute_13); primals_21 = view_18 = None + view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_5, [1, 64, 768]); addmm_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, view_19); add_8 = view_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True) + getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0] + getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1]; var_mean_3 = None + add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None + rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12); add_12 = None + sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21); getitem_21 = None + mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None + mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22) + add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_13, [64, 768]); add_13 = None + permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None + addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None + view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13); view_21 = mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14); mul_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15); mul_12 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_15, [64, 3072]); mul_15 = None + permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None + addmm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_27, view_22, permute_15); primals_27 = None + view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_7, [1, 64, 768]); addmm_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, view_23); add_11 = view_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True) + getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0] + getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1]; var_mean_4 = None + add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None + rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17); add_17 = None + sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23); getitem_23 = None + mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None + mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28) + add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_18, [64, 768]); add_18 = None + permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None + addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None + view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]); addmm_8 = None + split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None + getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0] + getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1] + getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2]; split_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None + permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None + permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None + permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True) + getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0] + getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1] + getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2] + getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None + permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None + addmm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_33, view_30, permute_21); primals_33 = view_30 = None + view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_9, [1, 64, 768]); addmm_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, view_31); add_16 = view_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True) + getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0] + getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1]; var_mean_5 = None + add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None + rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20); add_20 = None + sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32); getitem_32 = None + mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None + mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34) + add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_21, [64, 768]); add_21 = None + permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None + addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None + view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21); view_33 = mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22); mul_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23); mul_20 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_23, [64, 3072]); mul_23 = None + permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None + addmm_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_39, view_34, permute_23); primals_39 = None + view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_11, [1, 64, 768]); addmm_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, view_35); add_19 = view_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True) + getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0] + getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1]; var_mean_6 = None + add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None + rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25); add_25 = None + sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34); getitem_34 = None + mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None + mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40) + add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_26, [64, 768]); add_26 = None + permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None + addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None + view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]); addmm_12 = None + split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None + getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0] + getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1] + getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2]; split_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None + permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None + permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None + permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True) + getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0] + getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1] + getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2] + getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None + permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None + addmm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_45, view_42, permute_29); primals_45 = view_42 = None + view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_13, [1, 64, 768]); addmm_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, view_43); add_24 = view_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True) + getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0] + getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1]; var_mean_7 = None + add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None + rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28); add_28 = None + sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43); getitem_43 = None + mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None + mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46) + add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_29, [64, 768]); add_29 = None + permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None + addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None + view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29); view_45 = mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30); mul_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31); mul_28 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_31, [64, 3072]); mul_31 = None + permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None + addmm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_51, view_46, permute_31); primals_51 = None + view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_15, [1, 64, 768]); addmm_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, view_47); add_27 = view_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True) + getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0] + getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1]; var_mean_8 = None + add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None + rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33); add_33 = None + sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45); getitem_45 = None + mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None + mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52) + add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_34, [64, 768]); add_34 = None + permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None + addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None + view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]); addmm_16 = None + split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None + getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0] + getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1] + getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2]; split_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None + permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None + permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None + permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True) + getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0] + getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1] + getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2] + getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None + permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None + addmm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_57, view_54, permute_37); primals_57 = view_54 = None + view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_17, [1, 64, 768]); addmm_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, view_55); add_32 = view_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True) + getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0] + getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1]; var_mean_9 = None + add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None + rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36); add_36 = None + sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54); getitem_54 = None + mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None + mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58) + add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_37, [64, 768]); add_37 = None + permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None + addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None + view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37); view_57 = mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38); mul_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39); mul_36 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_39, [64, 3072]); mul_39 = None + permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None + addmm_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_63, view_58, permute_39); primals_63 = None + view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_19, [1, 64, 768]); addmm_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, view_59); add_35 = view_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True) + getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0] + getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1]; var_mean_10 = None + add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None + rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41); add_41 = None + sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56); getitem_56 = None + mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None + mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64) + add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_42, [64, 768]); add_42 = None + permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None + addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None + view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]); addmm_20 = None + split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None + getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0] + getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1] + getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2]; split_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None + permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None + permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None + permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True) + getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0] + getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1] + getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2] + getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None + permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None + addmm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_69, view_66, permute_45); primals_69 = view_66 = None + view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_21, [1, 64, 768]); addmm_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, view_67); add_40 = view_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True) + getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0] + getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1]; var_mean_11 = None + add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None + rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44); add_44 = None + sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65); getitem_65 = None + mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None + mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70) + add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_45, [64, 768]); add_45 = None + permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None + addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None + view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45); view_69 = mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46); mul_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47); mul_44 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_47, [64, 3072]); mul_47 = None + permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None + addmm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_75, view_70, permute_47); primals_75 = None + view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_23, [1, 64, 768]); addmm_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, view_71); add_43 = view_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True) + getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0] + getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1]; var_mean_12 = None + add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None + rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49); add_49 = None + sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67); getitem_67 = None + mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None + mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76) + add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_50, [64, 768]); add_50 = None + permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None + addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None + view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]); addmm_24 = None + split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None + getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0] + getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1] + getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2]; split_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None + permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None + permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None + permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True) + getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0] + getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1] + getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2] + getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None + permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None + addmm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_81, view_78, permute_53); primals_81 = view_78 = None + view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_25, [1, 64, 768]); addmm_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, view_79); add_48 = view_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True) + getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0] + getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1]; var_mean_13 = None + add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None + rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52); add_52 = None + sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76); getitem_76 = None + mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None + mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82) + add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_53, [64, 768]); add_53 = None + permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None + addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None + view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53); view_81 = mul_53 = None + mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54); mul_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55); mul_52 = add_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_55, [64, 3072]); mul_55 = None + permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None + addmm_27: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_87, view_82, permute_55); primals_87 = None + view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_27, [1, 64, 768]); addmm_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, view_83); add_51 = view_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True) + getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0] + getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1]; var_mean_14 = None + add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None + rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57); add_57 = None + sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78); getitem_78 = None + mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None + mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88) + add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_58, [64, 768]); add_58 = None + permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None + addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None + view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]); addmm_28 = None + split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None + getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0] + getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1] + getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2]; split_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None + permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None + permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None + permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True) + getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0] + getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1] + getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2] + getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None + permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None + addmm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_93, view_90, permute_61); primals_93 = view_90 = None + view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_29, [1, 64, 768]); addmm_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, view_91); add_56 = view_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True) + getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0] + getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1]; var_mean_15 = None + add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None + rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60); add_60 = None + sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87); getitem_87 = None + mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None + mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94) + add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_61, [64, 768]); add_61 = None + permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None + addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None + view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61); view_93 = mul_61 = None + mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62); mul_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63); mul_60 = add_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_63, [64, 3072]); mul_63 = None + permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None + addmm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_99, view_94, permute_63); primals_99 = None + view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_31, [1, 64, 768]); addmm_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, view_95); add_59 = view_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True) + getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0] + getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1]; var_mean_16 = None + add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None + rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65); add_65 = None + sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89); getitem_89 = None + mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None + mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100) + add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_66, [64, 768]); add_66 = None + permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None + addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None + view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]); addmm_32 = None + split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None + getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0] + getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1] + getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2]; split_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None + permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None + permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None + permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True) + getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0] + getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1] + getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2] + getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None + permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None + addmm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_105, view_102, permute_69); primals_105 = view_102 = None + view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_33, [1, 64, 768]); addmm_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, view_103); add_64 = view_103 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True) + getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0] + getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1]; var_mean_17 = None + add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None + rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68); add_68 = None + sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98); getitem_98 = None + mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None + mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106) + add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_69, [64, 768]); add_69 = None + permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None + addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None + view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69); view_105 = mul_69 = None + mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70); mul_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71); mul_68 = add_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_71, [64, 3072]); mul_71 = None + permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None + addmm_35: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_111, view_106, permute_71); primals_111 = None + view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_35, [1, 64, 768]); addmm_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, view_107); add_67 = view_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True) + getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0] + getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1]; var_mean_18 = None + add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None + rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73); add_73 = None + sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100); getitem_100 = None + mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None + mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112) + add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_74, [64, 768]); add_74 = None + permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None + addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None + view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]); addmm_36 = None + split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None + getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0] + getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1] + getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2]; split_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None + permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None + permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None + permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True) + getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0] + getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1] + getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2] + getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None + permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None + addmm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_117, view_114, permute_77); primals_117 = view_114 = None + view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_37, [1, 64, 768]); addmm_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, view_115); add_72 = view_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True) + getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0] + getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1]; var_mean_19 = None + add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None + rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76); add_76 = None + sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109); getitem_109 = None + mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None + mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118) + add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_77, [64, 768]); add_77 = None + permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None + addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None + view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77); view_117 = mul_77 = None + mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78); mul_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79); mul_76 = add_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_79, [64, 3072]); mul_79 = None + permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None + addmm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_123, view_118, permute_79); primals_123 = None + view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_39, [1, 64, 768]); addmm_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, view_119); add_75 = view_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True) + getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0] + getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1]; var_mean_20 = None + add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None + rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81); add_81 = None + sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111); getitem_111 = None + mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None + mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124) + add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_82, [64, 768]); add_82 = None + permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None + addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None + view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]); addmm_40 = None + split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None + getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0] + getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1] + getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2]; split_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None + permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None + permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None + permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True) + getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0] + getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1] + getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2] + getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None + permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None + addmm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_129, view_126, permute_85); primals_129 = view_126 = None + view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_41, [1, 64, 768]); addmm_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, view_127); add_80 = view_127 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True) + getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0] + getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1]; var_mean_21 = None + add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None + rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84); add_84 = None + sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120); getitem_120 = None + mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None + mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130) + add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_85, [64, 768]); add_85 = None + permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None + addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None + view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85); view_129 = mul_85 = None + mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86); mul_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87); mul_84 = add_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_87, [64, 3072]); mul_87 = None + permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None + addmm_43: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_135, view_130, permute_87); primals_135 = None + view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_43, [1, 64, 768]); addmm_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, view_131); add_83 = view_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True) + getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0] + getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1]; var_mean_22 = None + add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None + rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89); add_89 = None + sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122); getitem_122 = None + mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None + mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136) + add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_90, [64, 768]); add_90 = None + permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None + addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None + view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]); addmm_44 = None + split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None + getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0] + getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1] + getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2]; split_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None + permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None + permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None + permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True) + getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0] + getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1] + getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2] + getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None + permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None + addmm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_141, view_138, permute_93); primals_141 = view_138 = None + view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_45, [1, 64, 768]); addmm_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, view_139); add_88 = view_139 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True) + getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0] + getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1]; var_mean_23 = None + add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None + rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92); add_92 = None + sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131); getitem_131 = None + mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None + mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142) + add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_93, [64, 768]); add_93 = None + permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None + addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None + view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93); view_141 = mul_93 = None + mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94); mul_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95); mul_92 = add_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_95, [64, 3072]); mul_95 = None + permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None + addmm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_147, view_142, permute_95); primals_147 = None + view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_47, [1, 64, 768]); addmm_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, view_143); add_91 = view_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True) + getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0] + getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1]; var_mean_24 = None + add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None + rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97); add_97 = None + sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None + mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None + mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148) + add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + full_default: "i64[1][1]cuda:0" = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(add_98, [None, full_default]); add_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None + view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.view.default(index, [1, 768]); index = None + mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96) + view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.view.default(mm, [1, 1, 50304]); mm = None + permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]); permute = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None + return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24) + +V0806 13:55:55.886000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:529] {"aot_backward_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "06b75a7f8452a4f7c7a1286ae5e71622"} + class GraphModule(torch.nn.Module): + def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", unsqueeze: "i64[1, 64][64, 1]cuda:0", mul: "f32[1, 64, 768][49152, 768, 1]cuda:0", view: "f32[64, 768][768, 1]cuda:0", permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_7: "i64[][]cuda:0", getitem_8: "i64[][]cuda:0", mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_8: "f32[64, 768][768, 1]cuda:0", addmm_2: "f32[64, 3072][3072, 1]cuda:0", view_10: "f32[64, 3072][3072, 1]cuda:0", mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_12: "f32[64, 768][768, 1]cuda:0", permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_18: "i64[][]cuda:0", getitem_19: "i64[][]cuda:0", mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_20: "f32[64, 768][768, 1]cuda:0", addmm_6: "f32[64, 3072][3072, 1]cuda:0", view_22: "f32[64, 3072][3072, 1]cuda:0", mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_24: "f32[64, 768][768, 1]cuda:0", permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_29: "i64[][]cuda:0", getitem_30: "i64[][]cuda:0", mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_32: "f32[64, 768][768, 1]cuda:0", addmm_10: "f32[64, 3072][3072, 1]cuda:0", view_34: "f32[64, 3072][3072, 1]cuda:0", mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_36: "f32[64, 768][768, 1]cuda:0", permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_40: "i64[][]cuda:0", getitem_41: "i64[][]cuda:0", mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_44: "f32[64, 768][768, 1]cuda:0", addmm_14: "f32[64, 3072][3072, 1]cuda:0", view_46: "f32[64, 3072][3072, 1]cuda:0", mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_48: "f32[64, 768][768, 1]cuda:0", permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_51: "i64[][]cuda:0", getitem_52: "i64[][]cuda:0", mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_56: "f32[64, 768][768, 1]cuda:0", addmm_18: "f32[64, 3072][3072, 1]cuda:0", view_58: "f32[64, 3072][3072, 1]cuda:0", mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_60: "f32[64, 768][768, 1]cuda:0", permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_62: "i64[][]cuda:0", getitem_63: "i64[][]cuda:0", mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_68: "f32[64, 768][768, 1]cuda:0", addmm_22: "f32[64, 3072][3072, 1]cuda:0", view_70: "f32[64, 3072][3072, 1]cuda:0", mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_72: "f32[64, 768][768, 1]cuda:0", permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_73: "i64[][]cuda:0", getitem_74: "i64[][]cuda:0", mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_80: "f32[64, 768][768, 1]cuda:0", addmm_26: "f32[64, 3072][3072, 1]cuda:0", view_82: "f32[64, 3072][3072, 1]cuda:0", mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_84: "f32[64, 768][768, 1]cuda:0", permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_84: "i64[][]cuda:0", getitem_85: "i64[][]cuda:0", mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_92: "f32[64, 768][768, 1]cuda:0", addmm_30: "f32[64, 3072][3072, 1]cuda:0", view_94: "f32[64, 3072][3072, 1]cuda:0", mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_96: "f32[64, 768][768, 1]cuda:0", permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_95: "i64[][]cuda:0", getitem_96: "i64[][]cuda:0", mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_104: "f32[64, 768][768, 1]cuda:0", addmm_34: "f32[64, 3072][3072, 1]cuda:0", view_106: "f32[64, 3072][3072, 1]cuda:0", mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_108: "f32[64, 768][768, 1]cuda:0", permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_106: "i64[][]cuda:0", getitem_107: "i64[][]cuda:0", mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_116: "f32[64, 768][768, 1]cuda:0", addmm_38: "f32[64, 3072][3072, 1]cuda:0", view_118: "f32[64, 3072][3072, 1]cuda:0", mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_120: "f32[64, 768][768, 1]cuda:0", permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_117: "i64[][]cuda:0", getitem_118: "i64[][]cuda:0", mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_128: "f32[64, 768][768, 1]cuda:0", addmm_42: "f32[64, 3072][3072, 1]cuda:0", view_130: "f32[64, 3072][3072, 1]cuda:0", mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_132: "f32[64, 768][768, 1]cuda:0", permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_128: "i64[][]cuda:0", getitem_129: "i64[][]cuda:0", mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_140: "f32[64, 768][768, 1]cuda:0", addmm_46: "f32[64, 3072][3072, 1]cuda:0", view_142: "f32[64, 3072][3072, 1]cuda:0", mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0", full_default: "i64[1][1]cuda:0", view_144: "f32[1, 768][768, 1]cuda:0", permute_99: "f32[50304, 768][768, 1]cuda:0", div: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_101: "f32[768, 3072][3072, 1]cuda:0", permute_105: "f32[3072, 768][768, 1]cuda:0", div_1: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_109: "f32[768, 768][768, 1]cuda:0", permute_117: "f32[2304, 768][768, 1]cuda:0", div_2: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_121: "f32[768, 3072][3072, 1]cuda:0", permute_125: "f32[3072, 768][768, 1]cuda:0", div_3: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_129: "f32[768, 768][768, 1]cuda:0", permute_137: "f32[2304, 768][768, 1]cuda:0", div_4: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_141: "f32[768, 3072][3072, 1]cuda:0", permute_145: "f32[3072, 768][768, 1]cuda:0", div_5: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_149: "f32[768, 768][768, 1]cuda:0", permute_157: "f32[2304, 768][768, 1]cuda:0", div_6: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_161: "f32[768, 3072][3072, 1]cuda:0", permute_165: "f32[3072, 768][768, 1]cuda:0", div_7: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_169: "f32[768, 768][768, 1]cuda:0", permute_177: "f32[2304, 768][768, 1]cuda:0", div_8: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_181: "f32[768, 3072][3072, 1]cuda:0", permute_185: "f32[3072, 768][768, 1]cuda:0", div_9: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_189: "f32[768, 768][768, 1]cuda:0", permute_197: "f32[2304, 768][768, 1]cuda:0", div_10: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_201: "f32[768, 3072][3072, 1]cuda:0", permute_205: "f32[3072, 768][768, 1]cuda:0", div_11: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_209: "f32[768, 768][768, 1]cuda:0", permute_217: "f32[2304, 768][768, 1]cuda:0", div_12: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_221: "f32[768, 3072][3072, 1]cuda:0", permute_225: "f32[3072, 768][768, 1]cuda:0", div_13: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_229: "f32[768, 768][768, 1]cuda:0", permute_237: "f32[2304, 768][768, 1]cuda:0", div_14: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_241: "f32[768, 3072][3072, 1]cuda:0", permute_245: "f32[3072, 768][768, 1]cuda:0", div_15: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_249: "f32[768, 768][768, 1]cuda:0", permute_257: "f32[2304, 768][768, 1]cuda:0", div_16: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_261: "f32[768, 3072][3072, 1]cuda:0", permute_265: "f32[3072, 768][768, 1]cuda:0", div_17: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_269: "f32[768, 768][768, 1]cuda:0", permute_277: "f32[2304, 768][768, 1]cuda:0", div_18: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_281: "f32[768, 3072][3072, 1]cuda:0", permute_285: "f32[3072, 768][768, 1]cuda:0", div_19: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_289: "f32[768, 768][768, 1]cuda:0", permute_297: "f32[2304, 768][768, 1]cuda:0", div_20: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_301: "f32[768, 3072][3072, 1]cuda:0", permute_305: "f32[3072, 768][768, 1]cuda:0", div_21: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_309: "f32[768, 768][768, 1]cuda:0", permute_317: "f32[2304, 768][768, 1]cuda:0", div_22: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_321: "f32[768, 3072][3072, 1]cuda:0", permute_325: "f32[3072, 768][768, 1]cuda:0", div_23: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_329: "f32[768, 768][768, 1]cuda:0", permute_337: "f32[2304, 768][768, 1]cuda:0", div_24: "f32[1, 64, 1][64, 1, 1]cuda:0", tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"): + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.view.default(tangents_1, [1, 50304]); tangents_1 = None + permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0]) + mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None + permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None + mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None + view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_2, [1, 1, 768]); mm_2 = None + permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + full_default_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True); full_default_1 = full_default = view_147 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, primals_148); primals_148 = None + mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768) + sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True) + mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_96); mul_99 = None + sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None + mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, sum_2); sum_2 = None + sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None + sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None + mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None + mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, mul_96); mul_96 = None + sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None + sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]); index_put = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(mul_103, [64, 768]) + mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None + permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0]) + mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None + permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None + sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None + view_149: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_5, [768]); sum_5 = None + permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None + view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_3, [1, 64, 3072]); mm_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]); addmm_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5) + mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None + mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94); mul_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_11, tanh_11); tanh_11 = None + sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None + mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None + mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None + mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715) + pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None + mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None + mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_100, [64, 3072]); add_100 = None + mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None + permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0]) + mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None + permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None + sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None + view_152: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_6, [3072]); sum_6 = None + permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None + view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_5, [1, 64, 768]); mm_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None + mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768) + sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True) + mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_90); mul_115 = None + sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None + mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, sum_8); sum_8 = None + sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None + sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None + mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None + mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_90); mul_90 = None + sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None + sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_101, [64, 768]) + mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None + permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None + mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None + permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None + sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None + view_155: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_11, [768]); sum_11 = None + permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None + view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_7, [1, 64, 768]); mm_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]); view_156 = None + permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None + getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0] + getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1] + getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None + view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_114, [1, 64, 768]); permute_114 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None + view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_115, [1, 64, 768]); permute_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None + view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_116, [1, 64, 768]); permute_116 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None + view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat, [64, 2304]); cat = None + mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None + permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0]) + mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None + permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None + sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None + view_162: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_12, [2304]); sum_12 = None + permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None + view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_9, [1, 64, 768]); mm_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None + mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768) + sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True) + mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_88); mul_122 = None + sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None + mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, sum_14); sum_14 = None + sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None + sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None + mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None + mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_88); mul_88 = None + sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None + sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_102, [64, 768]) + mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None + permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0]) + mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None + permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None + sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None + view_165: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_17, [768]); sum_17 = None + permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None + view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_11, [1, 64, 3072]); mm_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]); addmm_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5) + mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None + mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86); mul_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_10, tanh_10); tanh_10 = None + sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None + mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None + mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None + mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715) + pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None + mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None + mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_104, [64, 3072]); add_104 = None + mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None + permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0]) + mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None + permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None + sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None + view_168: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_18, [3072]); sum_18 = None + permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None + view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_13, [1, 64, 768]); mm_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None + mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768) + sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True) + mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_82); mul_138 = None + sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None + mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, sum_20); sum_20 = None + sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None + sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None + mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None + mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_82); mul_82 = None + sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None + sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_105, [64, 768]) + mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None + permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None + mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None + permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None + sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None + view_171: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_23, [768]); sum_23 = None + permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None + view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_15, [1, 64, 768]); mm_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]); view_172 = None + permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None + getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0] + getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1] + getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None + view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_134, [1, 64, 768]); permute_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None + view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_135, [1, 64, 768]); permute_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None + view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_136, [1, 64, 768]); permute_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None + view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_1, [64, 2304]); cat_1 = None + mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None + permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0]) + mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None + permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None + sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None + view_178: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_24, [2304]); sum_24 = None + permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None + view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_17, [1, 64, 768]); mm_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None + mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768) + sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True) + mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_80); mul_145 = None + sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None + mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, sum_26); sum_26 = None + sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None + sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None + mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None + mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_80); mul_80 = None + sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None + sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_106, [64, 768]) + mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None + permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0]) + mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None + permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None + sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None + view_181: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_29, [768]); sum_29 = None + permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None + view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_19, [1, 64, 3072]); mm_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]); addmm_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5) + mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None + mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78); mul_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_9, tanh_9); tanh_9 = None + sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None + mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None + mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None + mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715) + pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None + mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None + mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_108, [64, 3072]); add_108 = None + mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None + permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0]) + mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None + permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None + sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None + view_184: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_30, [3072]); sum_30 = None + permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None + view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_21, [1, 64, 768]); mm_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None + mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768) + sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True) + mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_74); mul_161 = None + sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None + mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, sum_32); sum_32 = None + sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None + sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None + mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None + mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_74); mul_74 = None + sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None + sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_109, [64, 768]) + mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None + permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None + mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None + permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None + sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None + view_187: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_35, [768]); sum_35 = None + permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None + view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_23, [1, 64, 768]); mm_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]); view_188 = None + permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None + getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0] + getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1] + getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None + view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_154, [1, 64, 768]); permute_154 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None + view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_155, [1, 64, 768]); permute_155 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None + view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_156, [1, 64, 768]); permute_156 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None + view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_2, [64, 2304]); cat_2 = None + mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None + permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0]) + mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None + permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None + sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None + view_194: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_36, [2304]); sum_36 = None + permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None + view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_25, [1, 64, 768]); mm_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None + mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768) + sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True) + mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_72); mul_168 = None + sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None + mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, sum_38); sum_38 = None + sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None + sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None + mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None + mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_72); mul_72 = None + sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None + sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_110, [64, 768]) + mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None + permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0]) + mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None + permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None + sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None + view_197: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_41, [768]); sum_41 = None + permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None + view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_27, [1, 64, 3072]); mm_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]); addmm_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5) + mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None + mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70); mul_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_8, tanh_8); tanh_8 = None + sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None + mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None + mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None + mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715) + pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None + mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None + mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_112, [64, 3072]); add_112 = None + mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None + permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0]) + mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None + permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None + sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None + view_200: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_42, [3072]); sum_42 = None + permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None + view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_29, [1, 64, 768]); mm_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None + mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768) + sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True) + mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_66); mul_184 = None + sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None + mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, sum_44); sum_44 = None + sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None + sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None + mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None + mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_66); mul_66 = None + sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None + sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_113, [64, 768]) + mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None + permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None + mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None + permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None + sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None + view_203: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_47, [768]); sum_47 = None + permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None + view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_31, [1, 64, 768]); mm_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]); view_204 = None + permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None + getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0] + getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1] + getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None + view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_174, [1, 64, 768]); permute_174 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None + view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_175, [1, 64, 768]); permute_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None + view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_176, [1, 64, 768]); permute_176 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None + view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_3, [64, 2304]); cat_3 = None + mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None + permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0]) + mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None + permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None + sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None + view_210: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_48, [2304]); sum_48 = None + permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None + view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_33, [1, 64, 768]); mm_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None + mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768) + sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True) + mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_64); mul_191 = None + sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None + mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, sum_50); sum_50 = None + sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None + sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None + mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None + mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_64); mul_64 = None + sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None + sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_114, [64, 768]) + mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None + permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0]) + mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None + permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None + sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None + view_213: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_53, [768]); sum_53 = None + permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None + view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_35, [1, 64, 3072]); mm_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]); addmm_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5) + mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None + mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62); mul_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_7, tanh_7); tanh_7 = None + sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None + mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None + mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None + mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715) + pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None + mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None + mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_116, [64, 3072]); add_116 = None + mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None + permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0]) + mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None + permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None + sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None + view_216: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_54, [3072]); sum_54 = None + permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None + view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_37, [1, 64, 768]); mm_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None + mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768) + sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True) + mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_58); mul_207 = None + sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None + mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, sum_56); sum_56 = None + sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None + sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None + mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None + mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_58); mul_58 = None + sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None + sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_117, [64, 768]) + mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None + permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None + mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None + permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None + sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None + view_219: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_59, [768]); sum_59 = None + permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None + view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_39, [1, 64, 768]); mm_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]); view_220 = None + permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None + getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0] + getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1] + getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None + view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_194, [1, 64, 768]); permute_194 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None + view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_195, [1, 64, 768]); permute_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None + view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_196, [1, 64, 768]); permute_196 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None + view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_4, [64, 2304]); cat_4 = None + mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None + permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0]) + mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None + permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None + sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None + view_226: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_60, [2304]); sum_60 = None + permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None + view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_41, [1, 64, 768]); mm_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None + mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768) + sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True) + mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_56); mul_214 = None + sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None + mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, sum_62); sum_62 = None + sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None + sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None + mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None + mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_56); mul_56 = None + sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None + sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_118, [64, 768]) + mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None + permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0]) + mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None + permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None + sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None + view_229: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_65, [768]); sum_65 = None + permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None + view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_43, [1, 64, 3072]); mm_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]); addmm_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5) + mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None + mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54); mul_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_6, tanh_6); tanh_6 = None + sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None + mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None + mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None + mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715) + pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None + mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None + mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_120, [64, 3072]); add_120 = None + mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None + permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0]) + mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None + permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None + sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None + view_232: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_66, [3072]); sum_66 = None + permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None + view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_45, [1, 64, 768]); mm_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None + mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768) + sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True) + mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_50); mul_230 = None + sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None + mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, sum_68); sum_68 = None + sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None + sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None + mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None + mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_50); mul_50 = None + sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None + sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_121, [64, 768]) + mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None + permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None + mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None + permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None + sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None + view_235: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_71, [768]); sum_71 = None + permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None + view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_47, [1, 64, 768]); mm_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]); view_236 = None + permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None + getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0] + getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1] + getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None + view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_214, [1, 64, 768]); permute_214 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None + view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_215, [1, 64, 768]); permute_215 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None + view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_216, [1, 64, 768]); permute_216 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None + view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_5, [64, 2304]); cat_5 = None + mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None + permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0]) + mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None + permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None + sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None + view_242: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_72, [2304]); sum_72 = None + permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None + view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_49, [1, 64, 768]); mm_49 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None + mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768) + sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True) + mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_48); mul_237 = None + sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None + mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, sum_74); sum_74 = None + sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None + sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None + mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None + mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_48); mul_48 = None + sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None + sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_122, [64, 768]) + mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None + permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0]) + mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None + permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None + sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None + view_245: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_77, [768]); sum_77 = None + permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None + view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_51, [1, 64, 3072]); mm_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]); addmm_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5) + mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46); mul_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_5, tanh_5); tanh_5 = None + sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None + mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None + mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None + mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715) + pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None + mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None + mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_124, [64, 3072]); add_124 = None + mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None + permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0]) + mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None + permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None + sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None + view_248: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_78, [3072]); sum_78 = None + permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None + view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_53, [1, 64, 768]); mm_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None + mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768) + sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True) + mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_42); mul_253 = None + sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None + mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, sum_80); sum_80 = None + sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None + sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None + mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None + mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_42); mul_42 = None + sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None + sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_125, [64, 768]) + mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None + permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None + mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None + permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None + sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None + view_251: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_83, [768]); sum_83 = None + permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None + view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_55, [1, 64, 768]); mm_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]); view_252 = None + permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None + getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0] + getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1] + getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None + view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_234, [1, 64, 768]); permute_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None + view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_235, [1, 64, 768]); permute_235 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None + view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_236, [1, 64, 768]); permute_236 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None + view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_6, [64, 2304]); cat_6 = None + mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None + permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0]) + mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None + permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None + sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None + view_258: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_84, [2304]); sum_84 = None + permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None + view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_57, [1, 64, 768]); mm_57 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None + mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768) + sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True) + mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_40); mul_260 = None + sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None + mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, sum_86); sum_86 = None + sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None + sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None + mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None + mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_40); mul_40 = None + sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None + sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_126, [64, 768]) + mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None + permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0]) + mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None + permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None + sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None + view_261: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_89, [768]); sum_89 = None + permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None + view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_59, [1, 64, 3072]); mm_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]); addmm_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5) + mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38); mul_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_4, tanh_4); tanh_4 = None + sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None + mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None + mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None + mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715) + pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None + mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None + mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_128, [64, 3072]); add_128 = None + mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None + permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0]) + mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None + permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None + sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None + view_264: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_90, [3072]); sum_90 = None + permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None + view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_61, [1, 64, 768]); mm_61 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None + mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768) + sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True) + mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_34); mul_276 = None + sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None + mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, sum_92); sum_92 = None + sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None + sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None + mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None + mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_34); mul_34 = None + sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None + sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_129, [64, 768]) + mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None + permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None + mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None + permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None + sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None + view_267: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_95, [768]); sum_95 = None + permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None + view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_63, [1, 64, 768]); mm_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]); view_268 = None + permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None + getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0] + getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1] + getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None + view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_254, [1, 64, 768]); permute_254 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None + view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_255, [1, 64, 768]); permute_255 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None + view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_256, [1, 64, 768]); permute_256 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None + view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_7, [64, 2304]); cat_7 = None + mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None + permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0]) + mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None + permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None + sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None + view_274: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_96, [2304]); sum_96 = None + permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None + view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_65, [1, 64, 768]); mm_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None + mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768) + sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True) + mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_32); mul_283 = None + sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None + mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, sum_98); sum_98 = None + sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None + sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None + mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None + mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_32); mul_32 = None + sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None + sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_130, [64, 768]) + mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None + permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0]) + mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None + permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None + sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None + view_277: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_101, [768]); sum_101 = None + permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None + view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_67, [1, 64, 3072]); mm_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]); addmm_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5) + mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30); mul_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_3, tanh_3); tanh_3 = None + sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None + mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None + mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None + mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715) + pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None + mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None + mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_132, [64, 3072]); add_132 = None + mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None + permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0]) + mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None + permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None + sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None + view_280: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_102, [3072]); sum_102 = None + permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None + view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_69, [1, 64, 768]); mm_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None + mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768) + sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True) + mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_26); mul_299 = None + sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None + mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, sum_104); sum_104 = None + sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None + sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None + mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None + mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_26); mul_26 = None + sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None + sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_133, [64, 768]) + mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None + permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None + mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None + permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None + sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None + view_283: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_107, [768]); sum_107 = None + permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None + view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_71, [1, 64, 768]); mm_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]); view_284 = None + permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None + getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0] + getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1] + getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None + view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_274, [1, 64, 768]); permute_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None + view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_275, [1, 64, 768]); permute_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None + view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_276, [1, 64, 768]); permute_276 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None + view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_8, [64, 2304]); cat_8 = None + mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None + permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0]) + mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None + permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None + sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None + view_290: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_108, [2304]); sum_108 = None + permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None + view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_73, [1, 64, 768]); mm_73 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None + mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768) + sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True) + mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_24); mul_306 = None + sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None + mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, sum_110); sum_110 = None + sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None + sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None + mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None + mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_24); mul_24 = None + sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None + sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_134, [64, 768]) + mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None + permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0]) + mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None + permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None + sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None + view_293: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_113, [768]); sum_113 = None + permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None + view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_75, [1, 64, 3072]); mm_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]); addmm_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5) + mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22); mul_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_2, tanh_2); tanh_2 = None + sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None + mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None + mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None + mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715) + pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None + mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None + mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_136, [64, 3072]); add_136 = None + mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None + permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0]) + mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None + permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None + sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None + view_296: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_114, [3072]); sum_114 = None + permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None + view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_77, [1, 64, 768]); mm_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None + mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768) + sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True) + mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_18); mul_322 = None + sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None + mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, sum_116); sum_116 = None + sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None + sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None + mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None + mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_18); mul_18 = None + sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None + sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_137, [64, 768]) + mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None + permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None + mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None + permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None + sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None + view_299: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_119, [768]); sum_119 = None + permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None + view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_79, [1, 64, 768]); mm_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]); view_300 = None + permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None + getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0] + getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1] + getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None + view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_294, [1, 64, 768]); permute_294 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None + view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_295, [1, 64, 768]); permute_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None + view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_296, [1, 64, 768]); permute_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None + view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_9, [64, 2304]); cat_9 = None + mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None + permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0]) + mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None + permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None + sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None + view_306: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_120, [2304]); sum_120 = None + permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None + view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_81, [1, 64, 768]); mm_81 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None + mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768) + sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True) + mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_16); mul_329 = None + sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None + mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, sum_122); sum_122 = None + sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None + sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None + mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None + mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_16); mul_16 = None + sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None + sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_138, [64, 768]) + mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None + permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0]) + mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None + permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None + sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None + view_309: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_125, [768]); sum_125 = None + permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None + view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_83, [1, 64, 3072]); mm_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]); addmm_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5) + mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14); mul_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_1, tanh_1); tanh_1 = None + sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None + mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None + mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None + mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715) + pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None + mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None + mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_140, [64, 3072]); add_140 = None + mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None + permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0]) + mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None + permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None + sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None + view_312: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_126, [3072]); sum_126 = None + permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None + view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_85, [1, 64, 768]); mm_85 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None + mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768) + sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True) + mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_10); mul_345 = None + sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None + mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, sum_128); sum_128 = None + sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None + sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None + mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None + mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_10); mul_10 = None + sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None + sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_141, [64, 768]) + mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None + permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None + mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None + permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None + sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None + view_315: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_131, [768]); sum_131 = None + permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None + view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_87, [1, 64, 768]); mm_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]); view_316 = None + permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None + getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0] + getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1] + getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None + view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_314, [1, 64, 768]); permute_314 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None + view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_315, [1, 64, 768]); permute_315 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None + view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_316, [1, 64, 768]); permute_316 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None + view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_10, [64, 2304]); cat_10 = None + mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None + permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0]) + mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None + permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None + sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None + view_322: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_132, [2304]); sum_132 = None + permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None + view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_89, [1, 64, 768]); mm_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None + mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768) + sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True) + mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_8); mul_352 = None + sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None + mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, sum_134); sum_134 = None + sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None + sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None + mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None + mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_8); mul_8 = None + sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None + sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_142, [64, 768]) + mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None + permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0]) + mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None + permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None + sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None + view_325: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_137, [768]); sum_137 = None + permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None + view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_91, [1, 64, 3072]); mm_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]); addmm_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5) + mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6); mul_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh, tanh); tanh = None + sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None + mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None + mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None + mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715) + pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None + mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None + mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_144, [64, 3072]); add_144 = None + mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None + permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0]) + mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None + permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None + sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None + view_328: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_138, [3072]); sum_138 = None + permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None + view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_93, [1, 64, 768]); mm_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None + mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768) + sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True) + mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_2); mul_368 = None + sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None + mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, sum_140); sum_140 = None + sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None + sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None + mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None + mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_2); mul_2 = None + sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None + sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_145, [64, 768]) + mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None + permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None + mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None + permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None + sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None + view_331: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_143, [768]); sum_143 = None + permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None + view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_95, [1, 64, 768]); mm_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]); view_332 = None + permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None + getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0] + getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1] + getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None + view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_334, [1, 64, 768]); permute_334 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None + view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_335, [1, 64, 768]); permute_335 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None + view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_336, [1, 64, 768]); permute_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None + view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_11, [64, 2304]); cat_11 = None + mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None + permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0]) + mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None + permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None + sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None + view_338: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_144, [2304]); sum_144 = None + permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None + view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_97, [1, 64, 768]); mm_97 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None + mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768) + sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True) + mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul); mul_375 = None + sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None + mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, sum_146); sum_146 = None + sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None + sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None + mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None + mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul); mul = None + sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None + sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1) + unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1); eq = None + full_default_4: "f32[][]cuda:0" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146); unsqueeze_1 = None + full_default_5: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True); full_default_5 = unsqueeze = where = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1) + unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None + where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146); unsqueeze_2 = full_default_4 = add_146 = None + full_default_7: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True); full_default_7 = primals_1 = where_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None + return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4) + +V0806 13:55:55.887000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "059856ba64cec1d0c6ac4ed56652b6b2"} + { + "name": "compile_fx..fw_compiler_base", + "ts": 1722977755887911.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:55.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7483f59044110884390a2269fad6d0ef"} + { + "name": "compile_fx_inner", + "ts": 1722977755888329.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:55.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "83a9a08ca4aba301017b27afa68b083a"} + { + "name": "inductor_compile", + "ts": 1722977755888403.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:56.409000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "b567f1d921caff1709ea228157b31270"} + + import torch + from torch import tensor, device + import torch.fx as fx + from torch._dynamo.testing import rand_strided + from math import inf + import torch._inductor.inductor_prims + + import torch._dynamo.config + import torch._inductor.config + import torch._functorch.config + import torch.fx.experimental._config + + torch._inductor.config.triton.cudagraphs = True + torch._functorch.config.unlift_effect_tokens = True + + + + isolate_fails_code_str = None + + + + # torch version: 2.5.0a0+git6fbc72b + # torch cuda version: 12.0 + # torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d + + + # CUDA Info: + # nvcc: NVIDIA (R) Cuda compiler driver + # Copyright (c) 2005-2023 NVIDIA Corporation + # Built on Fri_Jan__6_16:45:21_PST_2023 + # Cuda compilation tools, release 12.0, V12.0.140 + # Build cuda_12.0.r12.0/compiler.32267302_0 + + # GPU Hardware Info: + # NVIDIA H100 : 1 + + + from torch.nn import * + class Repro(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + + + def forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149): + iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) + unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0); iota = None + embedding = torch.ops.aten.embedding.default(primals_2, primals_1) + embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None + add = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None + var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True) + getitem = var_mean[0] + getitem_1 = var_mean[1]; var_mean = None + add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None + rsqrt = torch.ops.aten.rsqrt.default(add_1); add_1 = None + sub = torch.ops.aten.sub.Tensor(add, getitem_1); getitem_1 = None + mul = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None + mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4) + add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None + view = torch.ops.aten.view.default(add_2, [64, 768]); add_2 = None + permute = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None + addmm = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None + view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]); addmm = None + split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None + getitem_2 = split[0] + getitem_3 = split[1] + getitem_4 = split[2]; split = None + view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None + permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None + view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None + permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None + view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None + permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None + _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True) + getitem_5 = _scaled_dot_product_efficient_attention[0] + getitem_6 = _scaled_dot_product_efficient_attention[1] + getitem_7 = _scaled_dot_product_efficient_attention[2] + getitem_8 = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None + permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None + view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None + permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None + addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5); primals_9 = view_6 = None + view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]); addmm_1 = None + add_3 = torch.ops.aten.add.Tensor(add, view_7); add = view_7 = None + var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True) + getitem_9 = var_mean_1[0] + getitem_10 = var_mean_1[1]; var_mean_1 = None + add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None + rsqrt_1 = torch.ops.aten.rsqrt.default(add_4); add_4 = None + sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10); getitem_10 = None + mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None + mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10) + add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None + view_8 = torch.ops.aten.view.default(add_5, [64, 768]); add_5 = None + permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None + addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None + view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]) + mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5) + pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); view_9 = mul_5 = None + mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None + add_7 = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None + mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7); mul_4 = add_7 = None + view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]); mul_7 = None + permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None + addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7); primals_15 = None + view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]); addmm_3 = None + add_8 = torch.ops.aten.add.Tensor(add_3, view_11); add_3 = view_11 = None + var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True) + getitem_11 = var_mean_2[0] + getitem_12 = var_mean_2[1]; var_mean_2 = None + add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None + rsqrt_2 = torch.ops.aten.rsqrt.default(add_9); add_9 = None + sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12); getitem_12 = None + mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None + mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16) + add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None + view_12 = torch.ops.aten.view.default(add_10, [64, 768]); add_10 = None + permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None + addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None + view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]); addmm_4 = None + split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None + getitem_13 = split_1[0] + getitem_14 = split_1[1] + getitem_15 = split_1[2]; split_1 = None + view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None + permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None + view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None + permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None + view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None + permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None + _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True) + getitem_16 = _scaled_dot_product_efficient_attention_1[0] + getitem_17 = _scaled_dot_product_efficient_attention_1[1] + getitem_18 = _scaled_dot_product_efficient_attention_1[2] + getitem_19 = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None + permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None + view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None + permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None + addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13); primals_21 = view_18 = None + view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]); addmm_5 = None + add_11 = torch.ops.aten.add.Tensor(add_8, view_19); add_8 = view_19 = None + var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True) + getitem_20 = var_mean_3[0] + getitem_21 = var_mean_3[1]; var_mean_3 = None + add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None + rsqrt_3 = torch.ops.aten.rsqrt.default(add_12); add_12 = None + sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21); getitem_21 = None + mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None + mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22) + add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None + view_20 = torch.ops.aten.view.default(add_13, [64, 768]); add_13 = None + permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None + addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None + view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]) + mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5) + pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); view_21 = mul_13 = None + mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None + add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None + mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15); mul_12 = add_15 = None + view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]); mul_15 = None + permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None + addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15); primals_27 = None + view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]); addmm_7 = None + add_16 = torch.ops.aten.add.Tensor(add_11, view_23); add_11 = view_23 = None + var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True) + getitem_22 = var_mean_4[0] + getitem_23 = var_mean_4[1]; var_mean_4 = None + add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None + rsqrt_4 = torch.ops.aten.rsqrt.default(add_17); add_17 = None + sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23); getitem_23 = None + mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None + mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28) + add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None + view_24 = torch.ops.aten.view.default(add_18, [64, 768]); add_18 = None + permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None + addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None + view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]); addmm_8 = None + split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None + getitem_24 = split_2[0] + getitem_25 = split_2[1] + getitem_26 = split_2[2]; split_2 = None + view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None + permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None + view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None + permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None + view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None + permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None + _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True) + getitem_27 = _scaled_dot_product_efficient_attention_2[0] + getitem_28 = _scaled_dot_product_efficient_attention_2[1] + getitem_29 = _scaled_dot_product_efficient_attention_2[2] + getitem_30 = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None + permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None + view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None + permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None + addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21); primals_33 = view_30 = None + view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]); addmm_9 = None + add_19 = torch.ops.aten.add.Tensor(add_16, view_31); add_16 = view_31 = None + var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True) + getitem_31 = var_mean_5[0] + getitem_32 = var_mean_5[1]; var_mean_5 = None + add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None + rsqrt_5 = torch.ops.aten.rsqrt.default(add_20); add_20 = None + sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32); getitem_32 = None + mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None + mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34) + add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None + view_32 = torch.ops.aten.view.default(add_21, [64, 768]); add_21 = None + permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None + addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None + view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]) + mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5) + pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); view_33 = mul_21 = None + mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None + add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None + mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23); mul_20 = add_23 = None + view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]); mul_23 = None + permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None + addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23); primals_39 = None + view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]); addmm_11 = None + add_24 = torch.ops.aten.add.Tensor(add_19, view_35); add_19 = view_35 = None + var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True) + getitem_33 = var_mean_6[0] + getitem_34 = var_mean_6[1]; var_mean_6 = None + add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None + rsqrt_6 = torch.ops.aten.rsqrt.default(add_25); add_25 = None + sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34); getitem_34 = None + mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None + mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40) + add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None + view_36 = torch.ops.aten.view.default(add_26, [64, 768]); add_26 = None + permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None + addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None + view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]); addmm_12 = None + split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None + getitem_35 = split_3[0] + getitem_36 = split_3[1] + getitem_37 = split_3[2]; split_3 = None + view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None + permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None + view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None + permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None + view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None + permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None + _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True) + getitem_38 = _scaled_dot_product_efficient_attention_3[0] + getitem_39 = _scaled_dot_product_efficient_attention_3[1] + getitem_40 = _scaled_dot_product_efficient_attention_3[2] + getitem_41 = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None + permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None + view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None + permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None + addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29); primals_45 = view_42 = None + view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]); addmm_13 = None + add_27 = torch.ops.aten.add.Tensor(add_24, view_43); add_24 = view_43 = None + var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True) + getitem_42 = var_mean_7[0] + getitem_43 = var_mean_7[1]; var_mean_7 = None + add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None + rsqrt_7 = torch.ops.aten.rsqrt.default(add_28); add_28 = None + sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43); getitem_43 = None + mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None + mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46) + add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None + view_44 = torch.ops.aten.view.default(add_29, [64, 768]); add_29 = None + permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None + addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None + view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]) + mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5) + pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); view_45 = mul_29 = None + mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None + add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None + mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31); mul_28 = add_31 = None + view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]); mul_31 = None + permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None + addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31); primals_51 = None + view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]); addmm_15 = None + add_32 = torch.ops.aten.add.Tensor(add_27, view_47); add_27 = view_47 = None + var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True) + getitem_44 = var_mean_8[0] + getitem_45 = var_mean_8[1]; var_mean_8 = None + add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None + rsqrt_8 = torch.ops.aten.rsqrt.default(add_33); add_33 = None + sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45); getitem_45 = None + mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None + mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52) + add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None + view_48 = torch.ops.aten.view.default(add_34, [64, 768]); add_34 = None + permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None + addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None + view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]); addmm_16 = None + split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None + getitem_46 = split_4[0] + getitem_47 = split_4[1] + getitem_48 = split_4[2]; split_4 = None + view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None + permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None + view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None + permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None + view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None + permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None + _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True) + getitem_49 = _scaled_dot_product_efficient_attention_4[0] + getitem_50 = _scaled_dot_product_efficient_attention_4[1] + getitem_51 = _scaled_dot_product_efficient_attention_4[2] + getitem_52 = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None + permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None + view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None + permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None + addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37); primals_57 = view_54 = None + view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]); addmm_17 = None + add_35 = torch.ops.aten.add.Tensor(add_32, view_55); add_32 = view_55 = None + var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True) + getitem_53 = var_mean_9[0] + getitem_54 = var_mean_9[1]; var_mean_9 = None + add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None + rsqrt_9 = torch.ops.aten.rsqrt.default(add_36); add_36 = None + sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54); getitem_54 = None + mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None + mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58) + add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None + view_56 = torch.ops.aten.view.default(add_37, [64, 768]); add_37 = None + permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None + addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None + view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]) + mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5) + pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); view_57 = mul_37 = None + mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None + add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None + mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39); mul_36 = add_39 = None + view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]); mul_39 = None + permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None + addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39); primals_63 = None + view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]); addmm_19 = None + add_40 = torch.ops.aten.add.Tensor(add_35, view_59); add_35 = view_59 = None + var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True) + getitem_55 = var_mean_10[0] + getitem_56 = var_mean_10[1]; var_mean_10 = None + add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None + rsqrt_10 = torch.ops.aten.rsqrt.default(add_41); add_41 = None + sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56); getitem_56 = None + mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None + mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64) + add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None + view_60 = torch.ops.aten.view.default(add_42, [64, 768]); add_42 = None + permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None + addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None + view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]); addmm_20 = None + split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None + getitem_57 = split_5[0] + getitem_58 = split_5[1] + getitem_59 = split_5[2]; split_5 = None + view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None + permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None + view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None + permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None + view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None + permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None + _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True) + getitem_60 = _scaled_dot_product_efficient_attention_5[0] + getitem_61 = _scaled_dot_product_efficient_attention_5[1] + getitem_62 = _scaled_dot_product_efficient_attention_5[2] + getitem_63 = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None + permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None + view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None + permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None + addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45); primals_69 = view_66 = None + view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]); addmm_21 = None + add_43 = torch.ops.aten.add.Tensor(add_40, view_67); add_40 = view_67 = None + var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True) + getitem_64 = var_mean_11[0] + getitem_65 = var_mean_11[1]; var_mean_11 = None + add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None + rsqrt_11 = torch.ops.aten.rsqrt.default(add_44); add_44 = None + sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65); getitem_65 = None + mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None + mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70) + add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None + view_68 = torch.ops.aten.view.default(add_45, [64, 768]); add_45 = None + permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None + addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None + view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]) + mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5) + pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); view_69 = mul_45 = None + mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None + add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None + mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47); mul_44 = add_47 = None + view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]); mul_47 = None + permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None + addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47); primals_75 = None + view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]); addmm_23 = None + add_48 = torch.ops.aten.add.Tensor(add_43, view_71); add_43 = view_71 = None + var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True) + getitem_66 = var_mean_12[0] + getitem_67 = var_mean_12[1]; var_mean_12 = None + add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None + rsqrt_12 = torch.ops.aten.rsqrt.default(add_49); add_49 = None + sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67); getitem_67 = None + mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None + mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76) + add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None + view_72 = torch.ops.aten.view.default(add_50, [64, 768]); add_50 = None + permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None + addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None + view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]); addmm_24 = None + split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None + getitem_68 = split_6[0] + getitem_69 = split_6[1] + getitem_70 = split_6[2]; split_6 = None + view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None + permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None + view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None + permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None + view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None + permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None + _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True) + getitem_71 = _scaled_dot_product_efficient_attention_6[0] + getitem_72 = _scaled_dot_product_efficient_attention_6[1] + getitem_73 = _scaled_dot_product_efficient_attention_6[2] + getitem_74 = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None + permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None + view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None + permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None + addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53); primals_81 = view_78 = None + view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]); addmm_25 = None + add_51 = torch.ops.aten.add.Tensor(add_48, view_79); add_48 = view_79 = None + var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True) + getitem_75 = var_mean_13[0] + getitem_76 = var_mean_13[1]; var_mean_13 = None + add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None + rsqrt_13 = torch.ops.aten.rsqrt.default(add_52); add_52 = None + sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76); getitem_76 = None + mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None + mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82) + add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None + view_80 = torch.ops.aten.view.default(add_53, [64, 768]); add_53 = None + permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None + addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None + view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]) + mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5) + pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); view_81 = mul_53 = None + mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None + add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None + mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55); mul_52 = add_55 = None + view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]); mul_55 = None + permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None + addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55); primals_87 = None + view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]); addmm_27 = None + add_56 = torch.ops.aten.add.Tensor(add_51, view_83); add_51 = view_83 = None + var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True) + getitem_77 = var_mean_14[0] + getitem_78 = var_mean_14[1]; var_mean_14 = None + add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None + rsqrt_14 = torch.ops.aten.rsqrt.default(add_57); add_57 = None + sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78); getitem_78 = None + mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None + mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88) + add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None + view_84 = torch.ops.aten.view.default(add_58, [64, 768]); add_58 = None + permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None + addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None + view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]); addmm_28 = None + split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None + getitem_79 = split_7[0] + getitem_80 = split_7[1] + getitem_81 = split_7[2]; split_7 = None + view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None + permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None + view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None + permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None + view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None + permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None + _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True) + getitem_82 = _scaled_dot_product_efficient_attention_7[0] + getitem_83 = _scaled_dot_product_efficient_attention_7[1] + getitem_84 = _scaled_dot_product_efficient_attention_7[2] + getitem_85 = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None + permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None + view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None + permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None + addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61); primals_93 = view_90 = None + view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]); addmm_29 = None + add_59 = torch.ops.aten.add.Tensor(add_56, view_91); add_56 = view_91 = None + var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True) + getitem_86 = var_mean_15[0] + getitem_87 = var_mean_15[1]; var_mean_15 = None + add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None + rsqrt_15 = torch.ops.aten.rsqrt.default(add_60); add_60 = None + sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87); getitem_87 = None + mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None + mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94) + add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None + view_92 = torch.ops.aten.view.default(add_61, [64, 768]); add_61 = None + permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None + addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None + view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]) + mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5) + pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); view_93 = mul_61 = None + mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None + add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None + mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63); mul_60 = add_63 = None + view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]); mul_63 = None + permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None + addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63); primals_99 = None + view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]); addmm_31 = None + add_64 = torch.ops.aten.add.Tensor(add_59, view_95); add_59 = view_95 = None + var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True) + getitem_88 = var_mean_16[0] + getitem_89 = var_mean_16[1]; var_mean_16 = None + add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None + rsqrt_16 = torch.ops.aten.rsqrt.default(add_65); add_65 = None + sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89); getitem_89 = None + mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None + mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100) + add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None + view_96 = torch.ops.aten.view.default(add_66, [64, 768]); add_66 = None + permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None + addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None + view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]); addmm_32 = None + split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None + getitem_90 = split_8[0] + getitem_91 = split_8[1] + getitem_92 = split_8[2]; split_8 = None + view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None + permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None + view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None + permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None + view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None + permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None + _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True) + getitem_93 = _scaled_dot_product_efficient_attention_8[0] + getitem_94 = _scaled_dot_product_efficient_attention_8[1] + getitem_95 = _scaled_dot_product_efficient_attention_8[2] + getitem_96 = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None + permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None + view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None + permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None + addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69); primals_105 = view_102 = None + view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]); addmm_33 = None + add_67 = torch.ops.aten.add.Tensor(add_64, view_103); add_64 = view_103 = None + var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True) + getitem_97 = var_mean_17[0] + getitem_98 = var_mean_17[1]; var_mean_17 = None + add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None + rsqrt_17 = torch.ops.aten.rsqrt.default(add_68); add_68 = None + sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98); getitem_98 = None + mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None + mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106) + add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None + view_104 = torch.ops.aten.view.default(add_69, [64, 768]); add_69 = None + permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None + addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None + view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]) + mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5) + pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); view_105 = mul_69 = None + mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None + add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None + mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71); mul_68 = add_71 = None + view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]); mul_71 = None + permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None + addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71); primals_111 = None + view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]); addmm_35 = None + add_72 = torch.ops.aten.add.Tensor(add_67, view_107); add_67 = view_107 = None + var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True) + getitem_99 = var_mean_18[0] + getitem_100 = var_mean_18[1]; var_mean_18 = None + add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None + rsqrt_18 = torch.ops.aten.rsqrt.default(add_73); add_73 = None + sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100); getitem_100 = None + mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None + mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112) + add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None + view_108 = torch.ops.aten.view.default(add_74, [64, 768]); add_74 = None + permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None + addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None + view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]); addmm_36 = None + split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None + getitem_101 = split_9[0] + getitem_102 = split_9[1] + getitem_103 = split_9[2]; split_9 = None + view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None + permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None + view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None + permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None + view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None + permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None + _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True) + getitem_104 = _scaled_dot_product_efficient_attention_9[0] + getitem_105 = _scaled_dot_product_efficient_attention_9[1] + getitem_106 = _scaled_dot_product_efficient_attention_9[2] + getitem_107 = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None + permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None + view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None + permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None + addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77); primals_117 = view_114 = None + view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]); addmm_37 = None + add_75 = torch.ops.aten.add.Tensor(add_72, view_115); add_72 = view_115 = None + var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True) + getitem_108 = var_mean_19[0] + getitem_109 = var_mean_19[1]; var_mean_19 = None + add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None + rsqrt_19 = torch.ops.aten.rsqrt.default(add_76); add_76 = None + sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109); getitem_109 = None + mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None + mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118) + add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None + view_116 = torch.ops.aten.view.default(add_77, [64, 768]); add_77 = None + permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None + addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None + view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]) + mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5) + pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); view_117 = mul_77 = None + mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None + add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None + mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79); mul_76 = add_79 = None + view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]); mul_79 = None + permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None + addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79); primals_123 = None + view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]); addmm_39 = None + add_80 = torch.ops.aten.add.Tensor(add_75, view_119); add_75 = view_119 = None + var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True) + getitem_110 = var_mean_20[0] + getitem_111 = var_mean_20[1]; var_mean_20 = None + add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None + rsqrt_20 = torch.ops.aten.rsqrt.default(add_81); add_81 = None + sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111); getitem_111 = None + mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None + mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124) + add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None + view_120 = torch.ops.aten.view.default(add_82, [64, 768]); add_82 = None + permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None + addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None + view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]); addmm_40 = None + split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None + getitem_112 = split_10[0] + getitem_113 = split_10[1] + getitem_114 = split_10[2]; split_10 = None + view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None + permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None + view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None + permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None + view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None + permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None + _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True) + getitem_115 = _scaled_dot_product_efficient_attention_10[0] + getitem_116 = _scaled_dot_product_efficient_attention_10[1] + getitem_117 = _scaled_dot_product_efficient_attention_10[2] + getitem_118 = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None + permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None + view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None + permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None + addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85); primals_129 = view_126 = None + view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]); addmm_41 = None + add_83 = torch.ops.aten.add.Tensor(add_80, view_127); add_80 = view_127 = None + var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True) + getitem_119 = var_mean_21[0] + getitem_120 = var_mean_21[1]; var_mean_21 = None + add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None + rsqrt_21 = torch.ops.aten.rsqrt.default(add_84); add_84 = None + sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120); getitem_120 = None + mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None + mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130) + add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None + view_128 = torch.ops.aten.view.default(add_85, [64, 768]); add_85 = None + permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None + addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None + view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]) + mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5) + pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); view_129 = mul_85 = None + mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None + add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None + mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87); mul_84 = add_87 = None + view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]); mul_87 = None + permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None + addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87); primals_135 = None + view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]); addmm_43 = None + add_88 = torch.ops.aten.add.Tensor(add_83, view_131); add_83 = view_131 = None + var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True) + getitem_121 = var_mean_22[0] + getitem_122 = var_mean_22[1]; var_mean_22 = None + add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None + rsqrt_22 = torch.ops.aten.rsqrt.default(add_89); add_89 = None + sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122); getitem_122 = None + mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None + mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136) + add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None + view_132 = torch.ops.aten.view.default(add_90, [64, 768]); add_90 = None + permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None + addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None + view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]); addmm_44 = None + split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None + getitem_123 = split_11[0] + getitem_124 = split_11[1] + getitem_125 = split_11[2]; split_11 = None + view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None + permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None + view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None + permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None + view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None + permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None + _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True) + getitem_126 = _scaled_dot_product_efficient_attention_11[0] + getitem_127 = _scaled_dot_product_efficient_attention_11[1] + getitem_128 = _scaled_dot_product_efficient_attention_11[2] + getitem_129 = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None + permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None + view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None + permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None + addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93); primals_141 = view_138 = None + view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]); addmm_45 = None + add_91 = torch.ops.aten.add.Tensor(add_88, view_139); add_88 = view_139 = None + var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True) + getitem_130 = var_mean_23[0] + getitem_131 = var_mean_23[1]; var_mean_23 = None + add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None + rsqrt_23 = torch.ops.aten.rsqrt.default(add_92); add_92 = None + sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131); getitem_131 = None + mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None + mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142) + add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None + view_140 = torch.ops.aten.view.default(add_93, [64, 768]); add_93 = None + permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None + addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None + view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]) + mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5) + pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); view_141 = mul_93 = None + mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None + add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None + mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95); mul_92 = add_95 = None + view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]); mul_95 = None + permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None + addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95); primals_147 = None + view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]); addmm_47 = None + add_96 = torch.ops.aten.add.Tensor(add_91, view_143); add_91 = view_143 = None + var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True) + getitem_132 = var_mean_24[0] + getitem_133 = var_mean_24[1]; var_mean_24 = None + add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None + rsqrt_24 = torch.ops.aten.rsqrt.default(add_97); add_97 = None + sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None + mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None + mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148) + add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None + full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index = torch.ops.aten.index.Tensor(add_98, [None, full_default]); add_98 = None + permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None + view_144 = torch.ops.aten.view.default(index, [1, 768]); index = None + mm = torch.ops.aten.mm.default(view_144, permute_96) + view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]); mm = None + permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None + div = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None + permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None + permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None + div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None + permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None + permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None + div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None + permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None + permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None + div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None + permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None + permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None + div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None + permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None + permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None + div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None + permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None + permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None + div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None + permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None + permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None + div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None + permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None + permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None + div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None + permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None + permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None + div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None + permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None + permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None + div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None + permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None + permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None + div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None + permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None + permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None + div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None + permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None + permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None + div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None + permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None + permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None + div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None + permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None + permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None + div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None + permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None + permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None + div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None + permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None + permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None + div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None + permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None + permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None + div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None + permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None + permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None + div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None + permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None + permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None + div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None + permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None + permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None + div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None + permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None + permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None + div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None + permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None + permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None + div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None + permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None + permute_337 = torch.ops.aten.permute.default(permute, [1, 0]); permute = None + div_24 = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None + return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24) + + def load_args(reader): + buf0 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf0, (1, 64), dtype=torch.int64, is_leaf=True) # primals_1 + buf1 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf1, (50304, 768), is_leaf=True) # primals_2 + buf2 = reader.storage(None, 3145728, device=device(type='cuda', index=0)) + reader.tensor(buf2, (1024, 768), is_leaf=True) # primals_3 + buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf3, (768,), is_leaf=True) # primals_4 + buf4 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf4, (768,), is_leaf=True) # primals_5 + buf5 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf5, (2304, 768), is_leaf=True) # primals_6 + buf6 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf6, (2304,), is_leaf=True) # primals_7 + buf7 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf7, (768, 768), is_leaf=True) # primals_8 + buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf8, (768,), is_leaf=True) # primals_9 + buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf9, (768,), is_leaf=True) # primals_10 + buf10 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf10, (768,), is_leaf=True) # primals_11 + buf11 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf11, (3072, 768), is_leaf=True) # primals_12 + buf12 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf12, (3072,), is_leaf=True) # primals_13 + buf13 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf13, (768, 3072), is_leaf=True) # primals_14 + buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf14, (768,), is_leaf=True) # primals_15 + buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf15, (768,), is_leaf=True) # primals_16 + buf16 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf16, (768,), is_leaf=True) # primals_17 + buf17 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf17, (2304, 768), is_leaf=True) # primals_18 + buf18 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf18, (2304,), is_leaf=True) # primals_19 + buf19 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf19, (768, 768), is_leaf=True) # primals_20 + buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf20, (768,), is_leaf=True) # primals_21 + buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf21, (768,), is_leaf=True) # primals_22 + buf22 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf22, (768,), is_leaf=True) # primals_23 + buf23 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf23, (3072, 768), is_leaf=True) # primals_24 + buf24 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf24, (3072,), is_leaf=True) # primals_25 + buf25 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf25, (768, 3072), is_leaf=True) # primals_26 + buf26 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf26, (768,), is_leaf=True) # primals_27 + buf27 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf27, (768,), is_leaf=True) # primals_28 + buf28 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf28, (768,), is_leaf=True) # primals_29 + buf29 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf29, (2304, 768), is_leaf=True) # primals_30 + buf30 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf30, (2304,), is_leaf=True) # primals_31 + buf31 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf31, (768, 768), is_leaf=True) # primals_32 + buf32 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf32, (768,), is_leaf=True) # primals_33 + buf33 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf33, (768,), is_leaf=True) # primals_34 + buf34 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf34, (768,), is_leaf=True) # primals_35 + buf35 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf35, (3072, 768), is_leaf=True) # primals_36 + buf36 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf36, (3072,), is_leaf=True) # primals_37 + buf37 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf37, (768, 3072), is_leaf=True) # primals_38 + buf38 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf38, (768,), is_leaf=True) # primals_39 + buf39 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf39, (768,), is_leaf=True) # primals_40 + buf40 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf40, (768,), is_leaf=True) # primals_41 + buf41 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf41, (2304, 768), is_leaf=True) # primals_42 + buf42 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf42, (2304,), is_leaf=True) # primals_43 + buf43 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf43, (768, 768), is_leaf=True) # primals_44 + buf44 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf44, (768,), is_leaf=True) # primals_45 + buf45 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf45, (768,), is_leaf=True) # primals_46 + buf46 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf46, (768,), is_leaf=True) # primals_47 + buf47 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf47, (3072, 768), is_leaf=True) # primals_48 + buf48 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf48, (3072,), is_leaf=True) # primals_49 + buf49 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf49, (768, 3072), is_leaf=True) # primals_50 + buf50 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf50, (768,), is_leaf=True) # primals_51 + buf51 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf51, (768,), is_leaf=True) # primals_52 + buf52 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf52, (768,), is_leaf=True) # primals_53 + buf53 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf53, (2304, 768), is_leaf=True) # primals_54 + buf54 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf54, (2304,), is_leaf=True) # primals_55 + buf55 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf55, (768, 768), is_leaf=True) # primals_56 + buf56 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf56, (768,), is_leaf=True) # primals_57 + buf57 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf57, (768,), is_leaf=True) # primals_58 + buf58 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf58, (768,), is_leaf=True) # primals_59 + buf59 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf59, (3072, 768), is_leaf=True) # primals_60 + buf60 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf60, (3072,), is_leaf=True) # primals_61 + buf61 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf61, (768, 3072), is_leaf=True) # primals_62 + buf62 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf62, (768,), is_leaf=True) # primals_63 + buf63 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf63, (768,), is_leaf=True) # primals_64 + buf64 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf64, (768,), is_leaf=True) # primals_65 + buf65 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf65, (2304, 768), is_leaf=True) # primals_66 + buf66 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf66, (2304,), is_leaf=True) # primals_67 + buf67 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf67, (768, 768), is_leaf=True) # primals_68 + buf68 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf68, (768,), is_leaf=True) # primals_69 + buf69 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf69, (768,), is_leaf=True) # primals_70 + buf70 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf70, (768,), is_leaf=True) # primals_71 + buf71 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf71, (3072, 768), is_leaf=True) # primals_72 + buf72 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf72, (3072,), is_leaf=True) # primals_73 + buf73 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf73, (768, 3072), is_leaf=True) # primals_74 + buf74 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf74, (768,), is_leaf=True) # primals_75 + buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf75, (768,), is_leaf=True) # primals_76 + buf76 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf76, (768,), is_leaf=True) # primals_77 + buf77 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf77, (2304, 768), is_leaf=True) # primals_78 + buf78 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf78, (2304,), is_leaf=True) # primals_79 + buf79 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf79, (768, 768), is_leaf=True) # primals_80 + buf80 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf80, (768,), is_leaf=True) # primals_81 + buf81 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf81, (768,), is_leaf=True) # primals_82 + buf82 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf82, (768,), is_leaf=True) # primals_83 + buf83 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf83, (3072, 768), is_leaf=True) # primals_84 + buf84 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf84, (3072,), is_leaf=True) # primals_85 + buf85 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf85, (768, 3072), is_leaf=True) # primals_86 + buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf86, (768,), is_leaf=True) # primals_87 + buf87 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf87, (768,), is_leaf=True) # primals_88 + buf88 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf88, (768,), is_leaf=True) # primals_89 + buf89 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf89, (2304, 768), is_leaf=True) # primals_90 + buf90 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf90, (2304,), is_leaf=True) # primals_91 + buf91 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf91, (768, 768), is_leaf=True) # primals_92 + buf92 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf92, (768,), is_leaf=True) # primals_93 + buf93 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf93, (768,), is_leaf=True) # primals_94 + buf94 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf94, (768,), is_leaf=True) # primals_95 + buf95 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf95, (3072, 768), is_leaf=True) # primals_96 + buf96 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf96, (3072,), is_leaf=True) # primals_97 + buf97 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf97, (768, 3072), is_leaf=True) # primals_98 + buf98 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf98, (768,), is_leaf=True) # primals_99 + buf99 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf99, (768,), is_leaf=True) # primals_100 + buf100 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf100, (768,), is_leaf=True) # primals_101 + buf101 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf101, (2304, 768), is_leaf=True) # primals_102 + buf102 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf102, (2304,), is_leaf=True) # primals_103 + buf103 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf103, (768, 768), is_leaf=True) # primals_104 + buf104 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf104, (768,), is_leaf=True) # primals_105 + buf105 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf105, (768,), is_leaf=True) # primals_106 + buf106 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf106, (768,), is_leaf=True) # primals_107 + buf107 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf107, (3072, 768), is_leaf=True) # primals_108 + buf108 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf108, (3072,), is_leaf=True) # primals_109 + buf109 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf109, (768, 3072), is_leaf=True) # primals_110 + buf110 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf110, (768,), is_leaf=True) # primals_111 + buf111 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf111, (768,), is_leaf=True) # primals_112 + buf112 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf112, (768,), is_leaf=True) # primals_113 + buf113 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf113, (2304, 768), is_leaf=True) # primals_114 + buf114 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf114, (2304,), is_leaf=True) # primals_115 + buf115 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf115, (768, 768), is_leaf=True) # primals_116 + buf116 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf116, (768,), is_leaf=True) # primals_117 + buf117 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf117, (768,), is_leaf=True) # primals_118 + buf118 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf118, (768,), is_leaf=True) # primals_119 + buf119 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf119, (3072, 768), is_leaf=True) # primals_120 + buf120 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf120, (3072,), is_leaf=True) # primals_121 + buf121 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf121, (768, 3072), is_leaf=True) # primals_122 + buf122 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf122, (768,), is_leaf=True) # primals_123 + buf123 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf123, (768,), is_leaf=True) # primals_124 + buf124 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf124, (768,), is_leaf=True) # primals_125 + buf125 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf125, (2304, 768), is_leaf=True) # primals_126 + buf126 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf126, (2304,), is_leaf=True) # primals_127 + buf127 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf127, (768, 768), is_leaf=True) # primals_128 + buf128 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf128, (768,), is_leaf=True) # primals_129 + buf129 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf129, (768,), is_leaf=True) # primals_130 + buf130 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf130, (768,), is_leaf=True) # primals_131 + buf131 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf131, (3072, 768), is_leaf=True) # primals_132 + buf132 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf132, (3072,), is_leaf=True) # primals_133 + buf133 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf133, (768, 3072), is_leaf=True) # primals_134 + buf134 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf134, (768,), is_leaf=True) # primals_135 + buf135 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf135, (768,), is_leaf=True) # primals_136 + buf136 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf136, (768,), is_leaf=True) # primals_137 + buf137 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf137, (2304, 768), is_leaf=True) # primals_138 + buf138 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf138, (2304,), is_leaf=True) # primals_139 + buf139 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf139, (768, 768), is_leaf=True) # primals_140 + buf140 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf140, (768,), is_leaf=True) # primals_141 + buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf141, (768,), is_leaf=True) # primals_142 + buf142 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf142, (768,), is_leaf=True) # primals_143 + buf143 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf143, (3072, 768), is_leaf=True) # primals_144 + buf144 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf144, (3072,), is_leaf=True) # primals_145 + buf145 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf145, (768, 3072), is_leaf=True) # primals_146 + buf146 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf146, (768,), is_leaf=True) # primals_147 + buf147 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf147, (768,), is_leaf=True) # primals_148 + buf148 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf148, (768,), is_leaf=True) # primals_149 + load_args._version = 0 + mod = Repro() + if __name__ == '__main__': + from torch._dynamo.repro.after_aot import run_repro + with torch.no_grad(): + run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None) + # To run it separately, do + # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None) + # mod(*args) +V0806 13:55:56.656000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "105eb069aa0795e578b78b6ad2013154"} + class GraphModule(torch.nn.Module): + def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_2: "f32[50304, 768][768, 1]cuda:0", primals_3: "f32[1024, 768][768, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_5: "f32[768][1]cuda:0", primals_6: "f32[2304, 768][768, 1]cuda:0", primals_7: "f32[2304][1]cuda:0", primals_8: "f32[768, 768][768, 1]cuda:0", primals_9: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_11: "f32[768][1]cuda:0", primals_12: "f32[3072, 768][768, 1]cuda:0", primals_13: "f32[3072][1]cuda:0", primals_14: "f32[768, 3072][3072, 1]cuda:0", primals_15: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_17: "f32[768][1]cuda:0", primals_18: "f32[2304, 768][768, 1]cuda:0", primals_19: "f32[2304][1]cuda:0", primals_20: "f32[768, 768][768, 1]cuda:0", primals_21: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_23: "f32[768][1]cuda:0", primals_24: "f32[3072, 768][768, 1]cuda:0", primals_25: "f32[3072][1]cuda:0", primals_26: "f32[768, 3072][3072, 1]cuda:0", primals_27: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_29: "f32[768][1]cuda:0", primals_30: "f32[2304, 768][768, 1]cuda:0", primals_31: "f32[2304][1]cuda:0", primals_32: "f32[768, 768][768, 1]cuda:0", primals_33: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_35: "f32[768][1]cuda:0", primals_36: "f32[3072, 768][768, 1]cuda:0", primals_37: "f32[3072][1]cuda:0", primals_38: "f32[768, 3072][3072, 1]cuda:0", primals_39: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_41: "f32[768][1]cuda:0", primals_42: "f32[2304, 768][768, 1]cuda:0", primals_43: "f32[2304][1]cuda:0", primals_44: "f32[768, 768][768, 1]cuda:0", primals_45: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_47: "f32[768][1]cuda:0", primals_48: "f32[3072, 768][768, 1]cuda:0", primals_49: "f32[3072][1]cuda:0", primals_50: "f32[768, 3072][3072, 1]cuda:0", primals_51: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_53: "f32[768][1]cuda:0", primals_54: "f32[2304, 768][768, 1]cuda:0", primals_55: "f32[2304][1]cuda:0", primals_56: "f32[768, 768][768, 1]cuda:0", primals_57: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_59: "f32[768][1]cuda:0", primals_60: "f32[3072, 768][768, 1]cuda:0", primals_61: "f32[3072][1]cuda:0", primals_62: "f32[768, 3072][3072, 1]cuda:0", primals_63: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_65: "f32[768][1]cuda:0", primals_66: "f32[2304, 768][768, 1]cuda:0", primals_67: "f32[2304][1]cuda:0", primals_68: "f32[768, 768][768, 1]cuda:0", primals_69: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_71: "f32[768][1]cuda:0", primals_72: "f32[3072, 768][768, 1]cuda:0", primals_73: "f32[3072][1]cuda:0", primals_74: "f32[768, 3072][3072, 1]cuda:0", primals_75: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_77: "f32[768][1]cuda:0", primals_78: "f32[2304, 768][768, 1]cuda:0", primals_79: "f32[2304][1]cuda:0", primals_80: "f32[768, 768][768, 1]cuda:0", primals_81: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_83: "f32[768][1]cuda:0", primals_84: "f32[3072, 768][768, 1]cuda:0", primals_85: "f32[3072][1]cuda:0", primals_86: "f32[768, 3072][3072, 1]cuda:0", primals_87: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_89: "f32[768][1]cuda:0", primals_90: "f32[2304, 768][768, 1]cuda:0", primals_91: "f32[2304][1]cuda:0", primals_92: "f32[768, 768][768, 1]cuda:0", primals_93: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_95: "f32[768][1]cuda:0", primals_96: "f32[3072, 768][768, 1]cuda:0", primals_97: "f32[3072][1]cuda:0", primals_98: "f32[768, 3072][3072, 1]cuda:0", primals_99: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_101: "f32[768][1]cuda:0", primals_102: "f32[2304, 768][768, 1]cuda:0", primals_103: "f32[2304][1]cuda:0", primals_104: "f32[768, 768][768, 1]cuda:0", primals_105: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_107: "f32[768][1]cuda:0", primals_108: "f32[3072, 768][768, 1]cuda:0", primals_109: "f32[3072][1]cuda:0", primals_110: "f32[768, 3072][3072, 1]cuda:0", primals_111: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_113: "f32[768][1]cuda:0", primals_114: "f32[2304, 768][768, 1]cuda:0", primals_115: "f32[2304][1]cuda:0", primals_116: "f32[768, 768][768, 1]cuda:0", primals_117: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_119: "f32[768][1]cuda:0", primals_120: "f32[3072, 768][768, 1]cuda:0", primals_121: "f32[3072][1]cuda:0", primals_122: "f32[768, 3072][3072, 1]cuda:0", primals_123: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_125: "f32[768][1]cuda:0", primals_126: "f32[2304, 768][768, 1]cuda:0", primals_127: "f32[2304][1]cuda:0", primals_128: "f32[768, 768][768, 1]cuda:0", primals_129: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_131: "f32[768][1]cuda:0", primals_132: "f32[3072, 768][768, 1]cuda:0", primals_133: "f32[3072][1]cuda:0", primals_134: "f32[768, 3072][3072, 1]cuda:0", primals_135: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_137: "f32[768][1]cuda:0", primals_138: "f32[2304, 768][768, 1]cuda:0", primals_139: "f32[2304][1]cuda:0", primals_140: "f32[768, 768][768, 1]cuda:0", primals_141: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_143: "f32[768][1]cuda:0", primals_144: "f32[3072, 768][768, 1]cuda:0", primals_145: "f32[3072][1]cuda:0", primals_146: "f32[768, 3072][3072, 1]cuda:0", primals_147: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", primals_149: "f32[768][1]cuda:0"): + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze( + iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) + unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0); iota = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb) + add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True) + getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0] + getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1]; var_mean = None + add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None + rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1); add_1 = None + sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add, getitem_1); getitem_1 = None + mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None + mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4) + add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_2, [64, 768]); add_2 = None + permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None + addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None + view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm, [1, 64, 2304]); addmm = None + split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None + getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0] + getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1] + getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2]; split = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None + permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None + permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None + permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True) + getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0] + getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1] + getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2] + getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_4, [1, 64, 768]); permute_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_5, [64, 768]); view_5 = None + permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None + + # No stacktrace found for following nodes + mm_default_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_6, permute_5); view_6 = None + add_tensor_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_23, primals_9); mm_default_23 = primals_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_23, [1, 64, 768]); add_tensor_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add, view_7); add = view_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True) + getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0] + getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1]; var_mean_1 = None + add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None + rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4); add_4 = None + sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10); getitem_10 = None + mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None + mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10) + add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_5, [64, 768]); add_5 = None + permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None + addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None + view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_2, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5); view_9 = mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6); mul_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7); mul_4 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_7, [64, 3072]); mul_7 = None + permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None + + # No stacktrace found for following nodes + mm_default_22: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_10, permute_7) + add_tensor_22: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_22, primals_15); mm_default_22 = primals_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_22, [1, 64, 768]); add_tensor_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, view_11); add_3 = view_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True) + getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0] + getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1]; var_mean_2 = None + add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None + rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9); add_9 = None + sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12); getitem_12 = None + mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None + mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16) + add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_10, [64, 768]); add_10 = None + permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None + addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None + view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_4, [1, 64, 2304]); addmm_4 = None + split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None + getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0] + getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1] + getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2]; split_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None + permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None + permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None + permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True) + getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0] + getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1] + getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2] + getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_12, [1, 64, 768]); permute_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_17, [64, 768]); view_17 = None + permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None + + # No stacktrace found for following nodes + mm_default_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_18, permute_13); view_18 = None + add_tensor_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_21, primals_21); mm_default_21 = primals_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_21, [1, 64, 768]); add_tensor_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, view_19); add_8 = view_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True) + getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0] + getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1]; var_mean_3 = None + add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None + rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12); add_12 = None + sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21); getitem_21 = None + mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None + mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22) + add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_13, [64, 768]); add_13 = None + permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None + addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None + view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_6, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13); view_21 = mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14); mul_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15); mul_12 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_15, [64, 3072]); mul_15 = None + permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None + + # No stacktrace found for following nodes + mm_default_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_22, permute_15) + add_tensor_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_20, primals_27); mm_default_20 = primals_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_20, [1, 64, 768]); add_tensor_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, view_23); add_11 = view_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True) + getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0] + getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1]; var_mean_4 = None + add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None + rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17); add_17 = None + sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23); getitem_23 = None + mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None + mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28) + add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_18, [64, 768]); add_18 = None + permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None + addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None + view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_8, [1, 64, 2304]); addmm_8 = None + split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None + getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0] + getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1] + getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2]; split_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None + permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None + permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None + permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True) + getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0] + getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1] + getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2] + getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_20, [1, 64, 768]); permute_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_29, [64, 768]); view_29 = None + permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None + + # No stacktrace found for following nodes + mm_default_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_30, permute_21); view_30 = None + add_tensor_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_19, primals_33); mm_default_19 = primals_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_19, [1, 64, 768]); add_tensor_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, view_31); add_16 = view_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True) + getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0] + getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1]; var_mean_5 = None + add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None + rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20); add_20 = None + sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32); getitem_32 = None + mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None + mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34) + add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_21, [64, 768]); add_21 = None + permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None + addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None + view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_10, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21); view_33 = mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22); mul_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23); mul_20 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_23, [64, 3072]); mul_23 = None + permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None + + # No stacktrace found for following nodes + mm_default_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_34, permute_23) + add_tensor_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_18, primals_39); mm_default_18 = primals_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_18, [1, 64, 768]); add_tensor_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, view_35); add_19 = view_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True) + getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0] + getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1]; var_mean_6 = None + add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None + rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25); add_25 = None + sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34); getitem_34 = None + mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None + mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40) + add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_26, [64, 768]); add_26 = None + permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None + addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None + view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_12, [1, 64, 2304]); addmm_12 = None + split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None + getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0] + getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1] + getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2]; split_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None + permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None + permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None + permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True) + getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0] + getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1] + getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2] + getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_28, [1, 64, 768]); permute_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_41, [64, 768]); view_41 = None + permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None + + # No stacktrace found for following nodes + mm_default_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_42, permute_29); view_42 = None + add_tensor_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_17, primals_45); mm_default_17 = primals_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_17, [1, 64, 768]); add_tensor_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, view_43); add_24 = view_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True) + getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0] + getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1]; var_mean_7 = None + add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None + rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28); add_28 = None + sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43); getitem_43 = None + mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None + mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46) + add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_29, [64, 768]); add_29 = None + permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None + addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None + view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_14, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29); view_45 = mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30); mul_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31); mul_28 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_31, [64, 3072]); mul_31 = None + permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None + + # No stacktrace found for following nodes + mm_default_16: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_46, permute_31) + add_tensor_16: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_16, primals_51); mm_default_16 = primals_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_16, [1, 64, 768]); add_tensor_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, view_47); add_27 = view_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True) + getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0] + getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1]; var_mean_8 = None + add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None + rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33); add_33 = None + sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45); getitem_45 = None + mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None + mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52) + add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_34, [64, 768]); add_34 = None + permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None + addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None + view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_16, [1, 64, 2304]); addmm_16 = None + split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None + getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0] + getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1] + getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2]; split_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None + permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None + permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None + permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True) + getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0] + getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1] + getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2] + getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_36, [1, 64, 768]); permute_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_53, [64, 768]); view_53 = None + permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None + + # No stacktrace found for following nodes + mm_default_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_54, permute_37); view_54 = None + add_tensor_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_15, primals_57); mm_default_15 = primals_57 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_15, [1, 64, 768]); add_tensor_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, view_55); add_32 = view_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True) + getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0] + getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1]; var_mean_9 = None + add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None + rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36); add_36 = None + sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54); getitem_54 = None + mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None + mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58) + add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_37, [64, 768]); add_37 = None + permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None + addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None + view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_18, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37); view_57 = mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38); mul_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39); mul_36 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_39, [64, 3072]); mul_39 = None + permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None + + # No stacktrace found for following nodes + mm_default_14: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_58, permute_39) + add_tensor_14: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_14, primals_63); mm_default_14 = primals_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_14, [1, 64, 768]); add_tensor_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, view_59); add_35 = view_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True) + getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0] + getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1]; var_mean_10 = None + add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None + rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41); add_41 = None + sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56); getitem_56 = None + mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None + mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64) + add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_42, [64, 768]); add_42 = None + permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None + addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None + view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_20, [1, 64, 2304]); addmm_20 = None + split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None + getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0] + getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1] + getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2]; split_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None + permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None + permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None + permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True) + getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0] + getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1] + getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2] + getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_44, [1, 64, 768]); permute_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_65, [64, 768]); view_65 = None + permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None + + # No stacktrace found for following nodes + mm_default_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_66, permute_45); view_66 = None + add_tensor_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_13, primals_69); mm_default_13 = primals_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_13, [1, 64, 768]); add_tensor_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, view_67); add_40 = view_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True) + getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0] + getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1]; var_mean_11 = None + add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None + rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44); add_44 = None + sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65); getitem_65 = None + mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None + mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70) + add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_45, [64, 768]); add_45 = None + permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None + addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None + view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_22, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45); view_69 = mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46); mul_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47); mul_44 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_47, [64, 3072]); mul_47 = None + permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None + + # No stacktrace found for following nodes + mm_default_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_70, permute_47) + add_tensor_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_12, primals_75); mm_default_12 = primals_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_12, [1, 64, 768]); add_tensor_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, view_71); add_43 = view_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True) + getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0] + getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1]; var_mean_12 = None + add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None + rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49); add_49 = None + sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67); getitem_67 = None + mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None + mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76) + add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_50, [64, 768]); add_50 = None + permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None + addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None + view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_24, [1, 64, 2304]); addmm_24 = None + split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None + getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0] + getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1] + getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2]; split_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None + permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None + permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None + permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True) + getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0] + getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1] + getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2] + getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_52, [1, 64, 768]); permute_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_77, [64, 768]); view_77 = None + permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None + + # No stacktrace found for following nodes + mm_default_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_78, permute_53); view_78 = None + add_tensor_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_11, primals_81); mm_default_11 = primals_81 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_11, [1, 64, 768]); add_tensor_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, view_79); add_48 = view_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True) + getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0] + getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1]; var_mean_13 = None + add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None + rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52); add_52 = None + sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76); getitem_76 = None + mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None + mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82) + add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_53, [64, 768]); add_53 = None + permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None + addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None + view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_26, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53); view_81 = mul_53 = None + mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54); mul_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55); mul_52 = add_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_55, [64, 3072]); mul_55 = None + permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None + + # No stacktrace found for following nodes + mm_default_10: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_82, permute_55) + add_tensor_10: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_10, primals_87); mm_default_10 = primals_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_10, [1, 64, 768]); add_tensor_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, view_83); add_51 = view_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True) + getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0] + getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1]; var_mean_14 = None + add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None + rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57); add_57 = None + sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78); getitem_78 = None + mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None + mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88) + add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_58, [64, 768]); add_58 = None + permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None + addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None + view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_28, [1, 64, 2304]); addmm_28 = None + split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None + getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0] + getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1] + getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2]; split_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None + permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None + permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None + permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True) + getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0] + getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1] + getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2] + getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_60, [1, 64, 768]); permute_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_89, [64, 768]); view_89 = None + permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None + + # No stacktrace found for following nodes + mm_default_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_90, permute_61); view_90 = None + add_tensor_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_9, primals_93); mm_default_9 = primals_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_9, [1, 64, 768]); add_tensor_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, view_91); add_56 = view_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True) + getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0] + getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1]; var_mean_15 = None + add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None + rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60); add_60 = None + sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87); getitem_87 = None + mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None + mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94) + add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_61, [64, 768]); add_61 = None + permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None + addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None + view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_30, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61); view_93 = mul_61 = None + mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62); mul_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63); mul_60 = add_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_63, [64, 3072]); mul_63 = None + permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None + + # No stacktrace found for following nodes + mm_default_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_94, permute_63) + add_tensor_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_8, primals_99); mm_default_8 = primals_99 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_8, [1, 64, 768]); add_tensor_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, view_95); add_59 = view_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True) + getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0] + getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1]; var_mean_16 = None + add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None + rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65); add_65 = None + sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89); getitem_89 = None + mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None + mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100) + add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_66, [64, 768]); add_66 = None + permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None + addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None + view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_32, [1, 64, 2304]); addmm_32 = None + split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None + getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0] + getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1] + getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2]; split_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None + permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None + permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None + permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True) + getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0] + getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1] + getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2] + getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_68, [1, 64, 768]); permute_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_101, [64, 768]); view_101 = None + permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None + + # No stacktrace found for following nodes + mm_default_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_102, permute_69); view_102 = None + add_tensor_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_7, primals_105); mm_default_7 = primals_105 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_7, [1, 64, 768]); add_tensor_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, view_103); add_64 = view_103 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True) + getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0] + getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1]; var_mean_17 = None + add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None + rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68); add_68 = None + sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98); getitem_98 = None + mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None + mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106) + add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_69, [64, 768]); add_69 = None + permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None + addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None + view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_34, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69); view_105 = mul_69 = None + mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70); mul_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71); mul_68 = add_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_71, [64, 3072]); mul_71 = None + permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None + + # No stacktrace found for following nodes + mm_default_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_106, permute_71) + add_tensor_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_6, primals_111); mm_default_6 = primals_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_6, [1, 64, 768]); add_tensor_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, view_107); add_67 = view_107 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True) + getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0] + getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1]; var_mean_18 = None + add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None + rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73); add_73 = None + sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100); getitem_100 = None + mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None + mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112) + add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_74, [64, 768]); add_74 = None + permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None + addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None + view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_36, [1, 64, 2304]); addmm_36 = None + split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None + getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0] + getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1] + getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2]; split_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None + permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None + permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None + permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True) + getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0] + getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1] + getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2] + getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_76, [1, 64, 768]); permute_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_113, [64, 768]); view_113 = None + permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None + + # No stacktrace found for following nodes + mm_default_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_114, permute_77); view_114 = None + add_tensor_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_5, primals_117); mm_default_5 = primals_117 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_5, [1, 64, 768]); add_tensor_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, view_115); add_72 = view_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True) + getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0] + getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1]; var_mean_19 = None + add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None + rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76); add_76 = None + sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109); getitem_109 = None + mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None + mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118) + add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_77, [64, 768]); add_77 = None + permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None + addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None + view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_38, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77); view_117 = mul_77 = None + mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78); mul_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79); mul_76 = add_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_79, [64, 3072]); mul_79 = None + permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None + + # No stacktrace found for following nodes + mm_default_4: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_118, permute_79) + add_tensor_4: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_4, primals_123); mm_default_4 = primals_123 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_4, [1, 64, 768]); add_tensor_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, view_119); add_75 = view_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True) + getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0] + getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1]; var_mean_20 = None + add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None + rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81); add_81 = None + sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111); getitem_111 = None + mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None + mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124) + add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_82, [64, 768]); add_82 = None + permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None + addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None + view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_40, [1, 64, 2304]); addmm_40 = None + split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None + getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0] + getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1] + getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2]; split_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None + permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None + permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None + permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True) + getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0] + getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1] + getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2] + getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_84, [1, 64, 768]); permute_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_125, [64, 768]); view_125 = None + permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None + + # No stacktrace found for following nodes + mm_default_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_126, permute_85); view_126 = None + add_tensor_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_3, primals_129); mm_default_3 = primals_129 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_3, [1, 64, 768]); add_tensor_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, view_127); add_80 = view_127 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True) + getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0] + getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1]; var_mean_21 = None + add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None + rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84); add_84 = None + sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120); getitem_120 = None + mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None + mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130) + add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_85, [64, 768]); add_85 = None + permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None + addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None + view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_42, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85); view_129 = mul_85 = None + mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86); mul_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87); mul_84 = add_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_87, [64, 3072]); mul_87 = None + permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None + + # No stacktrace found for following nodes + mm_default_2: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_130, permute_87) + add_tensor_2: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_2, primals_135); mm_default_2 = primals_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_2, [1, 64, 768]); add_tensor_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, view_131); add_83 = view_131 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True) + getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0] + getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1]; var_mean_22 = None + add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None + rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89); add_89 = None + sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122); getitem_122 = None + mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None + mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136) + add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_90, [64, 768]); add_90 = None + permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None + addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None + view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_44, [1, 64, 2304]); addmm_44 = None + split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None + getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0] + getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1] + getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2]; split_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None + permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None + permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None + permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True) + getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0] + getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1] + getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2] + getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_92, [1, 64, 768]); permute_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_137, [64, 768]); view_137 = None + permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None + + # No stacktrace found for following nodes + mm_default_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_138, permute_93); view_138 = None + add_tensor_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_1, primals_141); mm_default_1 = primals_141 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_1, [1, 64, 768]); add_tensor_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x)) + add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, view_139); add_88 = view_139 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True) + getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0] + getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1]; var_mean_23 = None + add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None + rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92); add_92 = None + sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131); getitem_131 = None + mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None + mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142) + add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_93, [64, 768]); add_93 = None + permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None + addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None + view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_46, [1, 64, 3072]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93); view_141 = mul_93 = None + mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94); mul_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95); mul_92 = add_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_95, [64, 3072]); mul_95 = None + permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None + + # No stacktrace found for following nodes + mm_default: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_142, permute_95) + add_tensor: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default, primals_147); mm_default = primals_147 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor, [1, 64, 768]); add_tensor = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x)) + add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, view_143); add_91 = view_143 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True) + getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0] + getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1]; var_mean_24 = None + add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None + rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97); add_97 = None + sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None + mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None + mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148) + add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + full_default: "i64[1][1]cuda:0" = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(add_98, [None, full_default]); add_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None + view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(index, [1, 768]); index = None + mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96) + view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.reshape.default(mm, [1, 1, 50304]); mm = None + permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]); permute = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None + return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24) + +V0806 13:55:56.662000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f9732de63a2678cd84f99c06235155c7"} + { + "name": "GraphLowering.run", + "ts": 1722977756662900.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:57.358000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5c78e44dee7eae3e1fe0c6d03c87b6d6"} + { + "name": "GraphLowering.run", + "ts": 1722977757358803.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:57.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "24bd000f779650a5bca456fe7f33510d"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977757361591.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:57.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "817c28f5892846ba90a4e241b47bc9aa"} + { + "name": "code_gen", + "ts": 1722977757361702.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:57.367000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "268013e64077861aa06897ccb5d7ed85"} + { + "name": "Scheduler.__init__", + "ts": 1722977757367030.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:57.994000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5ea38044443328684bb4a7c1e7a9345b"} + { + "name": "Scheduler.__init__", + "ts": 1722977757994874.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:57.995000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f1875d4b22158eb9e26e63b134b00766"} + { + "name": "Scheduler.codegen", + "ts": 1722977757995265.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:58.621000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "effece776be3ac8b00bd99af0f3b9e28"} + { + "name": "Scheduler.codegen", + "ts": 1722977758621719.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:58.622000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d253d967fc7804a3c84fe17aa255d0d4"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977758622009.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:55:58.647000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fb1b1c454711c8a114bd0247e8c5bc50"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977758647481.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:55:58.648000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/6z/c6zg4h42euxwsaoxhpcfic2sgwsxsngjulbnvydlzmvlm7pswqjm.py"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "fd1ea0df8f0e030ef934fc089a8c1ff2"} + + # AOT ID: ['0_forward'] + from ctypes import c_void_p, c_long + import torch + import math + import random + import os + import tempfile + from math import inf, nan + from torch._inductor.hooks import run_intermediate_hooks + from torch._inductor.utils import maybe_profile + from torch._inductor.codegen.memory_planning import _align as align + + from torch import device, empty_strided + from torch._inductor.async_compile import AsyncCompile + from torch._inductor.select_algorithm import extern_kernels + from torch._inductor.codegen.multi_kernel import MultiKernelCall + + aten = torch.ops.aten + inductor_ops = torch.ops.inductor + _quantized = torch.ops._quantized + assert_size_stride = torch._C._dynamo.guards.assert_size_stride + empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu + empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda + reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor + alloc_from_pool = torch.ops.inductor._alloc_from_pool + async_compile = AsyncCompile() + + + # kernel path: /tmp/tmp2ln889l5/ud/cudpuzdqzuzntk7ujry646lmmfgeazd3ik3spi6vvf2mczwqx4bc.py + # Source Nodes: [add, arange, layer_norm, pos_emb, tok_emb], Original ATen: [aten.add, aten.arange, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward] + # add => add + # arange => iota + # layer_norm => add_1, add_2, mul, mul_1, rsqrt, sub, var_mean + # pos_emb => embedding_1 + # tok_emb => embedding + triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.DEFAULT, + filename=__file__, + triton_meta={'signature': {0: '*i64', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*i64', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: 'i32', 10: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, out_ptr0, out_ptr3, out_ptr4, out_ptr5, xnumel, rnumel, XBLOCK : tl.constexpr, RBLOCK : tl.constexpr): + xnumel = 64 + rnumel = 768 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:, None] + xmask = xindex < xnumel + rbase = tl.arange(0, RBLOCK)[None, :] + x0 = xindex + tmp0 = x0 + tl.store(out_ptr0 + (x0), tmp0, xmask) + tmp1 = tl.load(in_ptr0 + (x0), xmask, eviction_policy='evict_last') + tmp11_mean = tl.zeros([XBLOCK, RBLOCK], tl.float32) + tmp11_m2 = tl.zeros([XBLOCK, RBLOCK], tl.float32) + tmp11_weight = tl.zeros([XBLOCK, RBLOCK], tl.float32) + for roffset in range(0, rnumel, RBLOCK): + rindex = roffset + rbase + rmask = rindex < rnumel + r1 = rindex + tmp2 = tl.full([XBLOCK, RBLOCK], 50304, tl.int32) + tmp3 = tmp1 + tmp2 + tmp4 = tmp1 < 0 + tmp5 = tl.where(tmp4, tmp3, tmp1) + tl.device_assert(((0 <= tmp5) & (tmp5 < 50304)) | ~(xmask), "index out of bounds: 0 <= tmp5 < 50304") + tmp7 = tl.load(in_ptr1 + (r1 + (768*tmp5)), rmask & xmask, eviction_policy='evict_last', other=0.0) + tmp8 = tl.load(in_ptr2 + (r1 + (768*tmp0)), rmask & xmask, eviction_policy='evict_last', other=0.0) + tmp9 = tmp7 + tmp8 + tmp10 = tl.broadcast_to(tmp9, [XBLOCK, RBLOCK]) + tmp11_mean_next, tmp11_m2_next, tmp11_weight_next = triton_helpers.welford_reduce( + tmp10, tmp11_mean, tmp11_m2, tmp11_weight, roffset == 0 + ) + tmp11_mean = tl.where(rmask & xmask, tmp11_mean_next, tmp11_mean) + tmp11_m2 = tl.where(rmask & xmask, tmp11_m2_next, tmp11_m2) + tmp11_weight = tl.where(rmask & xmask, tmp11_weight_next, tmp11_weight) + tmp11_tmp, tmp12_tmp, tmp13_tmp = triton_helpers.welford( + tmp11_mean, tmp11_m2, tmp11_weight, 1 + ) + tmp11 = tmp11_tmp[:, None] + tmp12 = tmp12_tmp[:, None] + tmp13 = tmp13_tmp[:, None] + for roffset in range(0, rnumel, RBLOCK): + rindex = roffset + rbase + rmask = rindex < rnumel + r1 = rindex + tmp29 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp31 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp14 = tl.full([XBLOCK, RBLOCK], 50304, tl.int32) + tmp15 = tmp1 + tmp14 + tmp16 = tmp1 < 0 + tmp17 = tl.where(tmp16, tmp15, tmp1) + tl.device_assert(((0 <= tmp17) & (tmp17 < 50304)) | ~(xmask), "index out of bounds: 0 <= tmp17 < 50304") + tmp19 = tl.load(in_ptr1 + (r1 + (768*tmp17)), rmask & xmask, eviction_policy='evict_first', other=0.0) + tmp20 = tl.load(in_ptr2 + (r1 + (768*tmp0)), rmask & xmask, eviction_policy='evict_first', other=0.0) + tmp21 = tmp19 + tmp20 + tmp22 = tmp21 - tmp11 + tmp23 = 768.0 + tmp24 = tmp12 / tmp23 + tmp25 = 1e-05 + tmp26 = tmp24 + tmp25 + tmp27 = libdevice.rsqrt(tmp26) + tmp28 = tmp22 * tmp27 + tmp30 = tmp28 * tmp29 + tmp32 = tmp30 + tmp31 + tl.store(out_ptr3 + (r1 + (768*x0)), tmp28, rmask & xmask) + tl.store(out_ptr4 + (r1 + (768*x0)), tmp32, rmask & xmask) + tmp33 = 768.0 + tmp34 = tmp12 / tmp33 + tmp35 = 1e-05 + tmp36 = tmp34 + tmp35 + tmp37 = libdevice.rsqrt(tmp36) + tmp38 = 0.0013020833333333333 + tmp39 = tmp37 * tmp38 + tl.store(out_ptr5 + (x0), tmp39, xmask) + ''', device_str='cuda') + + import triton + import triton.language as tl + from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph + from torch._C import _cuda_getCurrentRawStream as get_raw_stream + + + # kernel path: /tmp/tmp2ln889l5/ny/cnyd44m4eki6trasporudk5jhoi44nw2xayxtolgkf52e37t4zoc.py + # Source Nodes: [add, layer_norm_1, pos_emb, tok_emb, x_1], Original ATen: [aten.add, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward] + # add => add + # layer_norm_1 => add_4, add_5, mul_2, mul_3, rsqrt_1, sub_1, var_mean_1 + # pos_emb => embedding_1 + # tok_emb => embedding + # x_1 => add_3 + triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*i64', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: 'i32', 12: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 6, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + x0 = xindex + r1 = rindex + tmp0 = tl.load(in_ptr0 + (x0), None, eviction_policy='evict_last') + tmp7 = tl.load(in_ptr2 + (x0), None, eviction_policy='evict_last') + tmp15 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp16 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp42 = tl.load(in_ptr5 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp44 = tl.load(in_ptr6 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp1 = tl.full([RBLOCK], 50304, tl.int32) + tmp2 = tmp0 + tmp1 + tmp3 = tmp0 < 0 + tmp4 = tl.where(tmp3, tmp2, tmp0) + tl.device_assert((0 <= tmp4) & (tmp4 < 50304), "index out of bounds: 0 <= tmp4 < 50304") + tmp6 = tl.load(in_ptr1 + (r1 + (768*tmp4)), rmask, other=0.0) + tmp8 = tl.full([RBLOCK], 1024, tl.int32) + tmp9 = tmp7 + tmp8 + tmp10 = tmp7 < 0 + tmp11 = tl.where(tmp10, tmp9, tmp7) + tl.device_assert((0 <= tmp11) & (tmp11 < 1024), "index out of bounds: 0 <= tmp11 < 1024") + tmp13 = tl.load(in_ptr3 + (r1 + (768*tmp11)), rmask, other=0.0) + tmp14 = tmp6 + tmp13 + tmp17 = tmp15 + tmp16 + tmp18 = tmp14 + tmp17 + tmp19 = tl.broadcast_to(tmp18, [RBLOCK]) + tmp21 = tl.where(rmask, tmp19, 0) + tmp22 = tl.broadcast_to(tmp19, [RBLOCK]) + tmp24 = tl.where(rmask, tmp22, 0) + tmp25 = triton_helpers.promote_to_tensor(tl.sum(tmp24, 0)) + tmp26 = tl.full([1], 768, tl.int32) + tmp27 = tmp26.to(tl.float32) + tmp28 = tmp25 / tmp27 + tmp29 = tmp19 - tmp28 + tmp30 = tmp29 * tmp29 + tmp31 = tl.broadcast_to(tmp30, [RBLOCK]) + tmp33 = tl.where(rmask, tmp31, 0) + tmp34 = triton_helpers.promote_to_tensor(tl.sum(tmp33, 0)) + tmp35 = tmp18 - tmp28 + tmp36 = 768.0 + tmp37 = tmp34 / tmp36 + tmp38 = 1e-05 + tmp39 = tmp37 + tmp38 + tmp40 = libdevice.rsqrt(tmp39) + tmp41 = tmp35 * tmp40 + tmp43 = tmp41 * tmp42 + tmp45 = tmp43 + tmp44 + tmp46 = 0.0013020833333333333 + tmp47 = tmp40 * tmp46 + tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp18, rmask) + tl.store(out_ptr2 + (r1 + (768*x0)), tmp41, rmask) + tl.store(out_ptr3 + (r1 + (768*x0)), tmp45, rmask) + tl.store(out_ptr4 + (x0), tmp47, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/kg/ckgkhr4vlxuqepuczwft7i266h4lpr667eoo2cb6w36y3bmg4a4p.py + # Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh, x_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + # add_2 => add_6 + # add_3 => add_7 + # mul => mul_4 + # mul_1 => mul_5 + # mul_2 => mul_6 + # pow_1 => pow_1 + # tanh => tanh + # x_3 => mul_7 + triton_poi_fused_add_mul_pow_tanh_2 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[262144], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_mul_pow_tanh_2', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_ptr0, out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 196608 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), None) + tmp1 = 0.5 + tmp2 = tmp0 * tmp1 + tmp3 = tmp0 * tmp0 + tmp4 = tmp3 * tmp0 + tmp5 = 0.044715 + tmp6 = tmp4 * tmp5 + tmp7 = tmp0 + tmp6 + tmp8 = 0.7978845608028654 + tmp9 = tmp7 * tmp8 + tmp10 = libdevice.tanh(tmp9) + tmp11 = 1.0 + tmp12 = tmp10 + tmp11 + tmp13 = tmp2 * tmp12 + tl.store(out_ptr0 + (x0), tmp13, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/op/copzf2xr7ackxgbya76mhoxugg5bcf7szouu3u7calkmq6tn64cz.py + # Source Nodes: [layer_norm_2, x_6], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + # layer_norm_2 => add_10, add_9, mul_8, mul_9, rsqrt_2, sub_2, var_mean_2 + # x_6 => add_8 + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: 'i32', 9: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 5, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0) + tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp28 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp30 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp3 = tmp1 + tmp2 + tmp4 = tmp0 + tmp3 + tmp5 = tl.broadcast_to(tmp4, [RBLOCK]) + tmp7 = tl.where(rmask, tmp5, 0) + tmp8 = tl.broadcast_to(tmp5, [RBLOCK]) + tmp10 = tl.where(rmask, tmp8, 0) + tmp11 = triton_helpers.promote_to_tensor(tl.sum(tmp10, 0)) + tmp12 = tl.full([1], 768, tl.int32) + tmp13 = tmp12.to(tl.float32) + tmp14 = tmp11 / tmp13 + tmp15 = tmp5 - tmp14 + tmp16 = tmp15 * tmp15 + tmp17 = tl.broadcast_to(tmp16, [RBLOCK]) + tmp19 = tl.where(rmask, tmp17, 0) + tmp20 = triton_helpers.promote_to_tensor(tl.sum(tmp19, 0)) + tmp21 = tmp4 - tmp14 + tmp22 = 768.0 + tmp23 = tmp20 / tmp22 + tmp24 = 1e-05 + tmp25 = tmp23 + tmp24 + tmp26 = libdevice.rsqrt(tmp25) + tmp27 = tmp21 * tmp26 + tmp29 = tmp27 * tmp28 + tmp31 = tmp29 + tmp30 + tmp32 = 0.0013020833333333333 + tmp33 = tmp26 * tmp32 + tl.store(out_ptr2 + (r1 + (768*x0)), tmp27, rmask) + tl.store(out_ptr3 + (r1 + (768*x0)), tmp31, rmask) + tl.store(out_ptr4 + (x0), tmp33, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/ah/cahflggzhsvj3obucpotrc2sxhrjrpacfo22i6ocbqiklyoyzdzy.py + # Source Nodes: [layer_norm_3, x_6, x_7], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + # layer_norm_3 => add_12, add_13, mul_10, mul_11, rsqrt_3, sub_3, var_mean_3 + # x_6 => add_8 + # x_7 => add_11 + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: 'i32', 11: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 7, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0) + tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp5 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp6 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp32 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp34 = tl.load(in_ptr5 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp3 = tmp1 + tmp2 + tmp4 = tmp0 + tmp3 + tmp7 = tmp5 + tmp6 + tmp8 = tmp4 + tmp7 + tmp9 = tl.broadcast_to(tmp8, [RBLOCK]) + tmp11 = tl.where(rmask, tmp9, 0) + tmp12 = tl.broadcast_to(tmp9, [RBLOCK]) + tmp14 = tl.where(rmask, tmp12, 0) + tmp15 = triton_helpers.promote_to_tensor(tl.sum(tmp14, 0)) + tmp16 = tl.full([1], 768, tl.int32) + tmp17 = tmp16.to(tl.float32) + tmp18 = tmp15 / tmp17 + tmp19 = tmp9 - tmp18 + tmp20 = tmp19 * tmp19 + tmp21 = tl.broadcast_to(tmp20, [RBLOCK]) + tmp23 = tl.where(rmask, tmp21, 0) + tmp24 = triton_helpers.promote_to_tensor(tl.sum(tmp23, 0)) + tmp25 = tmp8 - tmp18 + tmp26 = 768.0 + tmp27 = tmp24 / tmp26 + tmp28 = 1e-05 + tmp29 = tmp27 + tmp28 + tmp30 = libdevice.rsqrt(tmp29) + tmp31 = tmp25 * tmp30 + tmp33 = tmp31 * tmp32 + tmp35 = tmp33 + tmp34 + tmp36 = 0.0013020833333333333 + tmp37 = tmp30 * tmp36 + tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp8, rmask) + tl.store(out_ptr2 + (r1 + (768*x0)), tmp31, rmask) + tl.store(out_ptr3 + (r1 + (768*x0)), tmp35, rmask) + tl.store(out_ptr4 + (x0), tmp37, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/zr/czrac7rfezxm2zkcroo5lzwzcd2xr3jtjf3s7p5cgvsqj2nr6rzf.py + # Source Nodes: [x_72, x_73], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + # x_72 => add_96 + # x_73 => add_97, mul_96, rsqrt_24, sub_24, var_mean_24 + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 3, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr2, out_ptr3, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0) + tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp3 = tmp1 + tmp2 + tmp4 = tmp0 + tmp3 + tmp5 = tl.broadcast_to(tmp4, [RBLOCK]) + tmp7 = tl.where(rmask, tmp5, 0) + tmp8 = tl.broadcast_to(tmp5, [RBLOCK]) + tmp10 = tl.where(rmask, tmp8, 0) + tmp11 = triton_helpers.promote_to_tensor(tl.sum(tmp10, 0)) + tmp12 = tl.full([1], 768, tl.int32) + tmp13 = tmp12.to(tl.float32) + tmp14 = tmp11 / tmp13 + tmp15 = tmp5 - tmp14 + tmp16 = tmp15 * tmp15 + tmp17 = tl.broadcast_to(tmp16, [RBLOCK]) + tmp19 = tl.where(rmask, tmp17, 0) + tmp20 = triton_helpers.promote_to_tensor(tl.sum(tmp19, 0)) + tmp21 = tmp4 - tmp14 + tmp22 = 768.0 + tmp23 = tmp20 / tmp22 + tmp24 = 1e-05 + tmp25 = tmp23 + tmp24 + tmp26 = libdevice.rsqrt(tmp25) + tmp27 = tmp21 * tmp26 + tmp28 = 0.0013020833333333333 + tmp29 = tmp26 * tmp28 + tl.store(out_ptr2 + (r1 + (768*x0)), tmp27, rmask) + tl.store(out_ptr3 + (x0), tmp29, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/hj/chjeqvb72dcyhretu5gdrd5qjthvzyvtudwafwqfm72xofwxb6bo.py + # Source Nodes: [getitem_36], Original ATen: [aten.lift_fresh] + # getitem_36 => full_default + triton_poi_fused_lift_fresh_6 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[1], + filename=__file__, + triton_meta={'signature': {0: '*i64', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {1: 1}, 'configs': [AttrsDescriptor(divisible_by_16=(0,), equal_to_1=(1,))]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_lift_fresh_6', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 1 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp0 = tl.full([1], -1, tl.int64) + tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp0, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/tg/ctgrjv6psw3vl6ccv5fzvtesrskw56ih646u36i3kxh3ljsmtwct.py + # Source Nodes: [getitem_36, x_73], Original ATen: [aten.index, aten.native_layer_norm] + # getitem_36 => index + # x_73 => add_98, mul_97 + triton_poi_fused_index_native_layer_norm_7 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[1024], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_index_native_layer_norm_7', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 768 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (48384 + x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp3 = tl.load(in_ptr2 + (x0), xmask) + tmp2 = tmp0 * tmp1 + tmp4 = tmp2 + tmp3 + tl.store(out_ptr0 + (x0), tmp4, xmask) + ''', device_str='cuda') + + + async_compile.wait(globals()) + del async_compile + + def call(args): + primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149 = args + args.clear() + assert_size_stride(primals_1, (1, 64), (64, 1)) + assert_size_stride(primals_2, (50304, 768), (768, 1)) + assert_size_stride(primals_3, (1024, 768), (768, 1)) + assert_size_stride(primals_4, (768, ), (1, )) + assert_size_stride(primals_5, (768, ), (1, )) + assert_size_stride(primals_6, (2304, 768), (768, 1)) + assert_size_stride(primals_7, (2304, ), (1, )) + assert_size_stride(primals_8, (768, 768), (768, 1)) + assert_size_stride(primals_9, (768, ), (1, )) + assert_size_stride(primals_10, (768, ), (1, )) + assert_size_stride(primals_11, (768, ), (1, )) + assert_size_stride(primals_12, (3072, 768), (768, 1)) + assert_size_stride(primals_13, (3072, ), (1, )) + assert_size_stride(primals_14, (768, 3072), (3072, 1)) + assert_size_stride(primals_15, (768, ), (1, )) + assert_size_stride(primals_16, (768, ), (1, )) + assert_size_stride(primals_17, (768, ), (1, )) + assert_size_stride(primals_18, (2304, 768), (768, 1)) + assert_size_stride(primals_19, (2304, ), (1, )) + assert_size_stride(primals_20, (768, 768), (768, 1)) + assert_size_stride(primals_21, (768, ), (1, )) + assert_size_stride(primals_22, (768, ), (1, )) + assert_size_stride(primals_23, (768, ), (1, )) + assert_size_stride(primals_24, (3072, 768), (768, 1)) + assert_size_stride(primals_25, (3072, ), (1, )) + assert_size_stride(primals_26, (768, 3072), (3072, 1)) + assert_size_stride(primals_27, (768, ), (1, )) + assert_size_stride(primals_28, (768, ), (1, )) + assert_size_stride(primals_29, (768, ), (1, )) + assert_size_stride(primals_30, (2304, 768), (768, 1)) + assert_size_stride(primals_31, (2304, ), (1, )) + assert_size_stride(primals_32, (768, 768), (768, 1)) + assert_size_stride(primals_33, (768, ), (1, )) + assert_size_stride(primals_34, (768, ), (1, )) + assert_size_stride(primals_35, (768, ), (1, )) + assert_size_stride(primals_36, (3072, 768), (768, 1)) + assert_size_stride(primals_37, (3072, ), (1, )) + assert_size_stride(primals_38, (768, 3072), (3072, 1)) + assert_size_stride(primals_39, (768, ), (1, )) + assert_size_stride(primals_40, (768, ), (1, )) + assert_size_stride(primals_41, (768, ), (1, )) + assert_size_stride(primals_42, (2304, 768), (768, 1)) + assert_size_stride(primals_43, (2304, ), (1, )) + assert_size_stride(primals_44, (768, 768), (768, 1)) + assert_size_stride(primals_45, (768, ), (1, )) + assert_size_stride(primals_46, (768, ), (1, )) + assert_size_stride(primals_47, (768, ), (1, )) + assert_size_stride(primals_48, (3072, 768), (768, 1)) + assert_size_stride(primals_49, (3072, ), (1, )) + assert_size_stride(primals_50, (768, 3072), (3072, 1)) + assert_size_stride(primals_51, (768, ), (1, )) + assert_size_stride(primals_52, (768, ), (1, )) + assert_size_stride(primals_53, (768, ), (1, )) + assert_size_stride(primals_54, (2304, 768), (768, 1)) + assert_size_stride(primals_55, (2304, ), (1, )) + assert_size_stride(primals_56, (768, 768), (768, 1)) + assert_size_stride(primals_57, (768, ), (1, )) + assert_size_stride(primals_58, (768, ), (1, )) + assert_size_stride(primals_59, (768, ), (1, )) + assert_size_stride(primals_60, (3072, 768), (768, 1)) + assert_size_stride(primals_61, (3072, ), (1, )) + assert_size_stride(primals_62, (768, 3072), (3072, 1)) + assert_size_stride(primals_63, (768, ), (1, )) + assert_size_stride(primals_64, (768, ), (1, )) + assert_size_stride(primals_65, (768, ), (1, )) + assert_size_stride(primals_66, (2304, 768), (768, 1)) + assert_size_stride(primals_67, (2304, ), (1, )) + assert_size_stride(primals_68, (768, 768), (768, 1)) + assert_size_stride(primals_69, (768, ), (1, )) + assert_size_stride(primals_70, (768, ), (1, )) + assert_size_stride(primals_71, (768, ), (1, )) + assert_size_stride(primals_72, (3072, 768), (768, 1)) + assert_size_stride(primals_73, (3072, ), (1, )) + assert_size_stride(primals_74, (768, 3072), (3072, 1)) + assert_size_stride(primals_75, (768, ), (1, )) + assert_size_stride(primals_76, (768, ), (1, )) + assert_size_stride(primals_77, (768, ), (1, )) + assert_size_stride(primals_78, (2304, 768), (768, 1)) + assert_size_stride(primals_79, (2304, ), (1, )) + assert_size_stride(primals_80, (768, 768), (768, 1)) + assert_size_stride(primals_81, (768, ), (1, )) + assert_size_stride(primals_82, (768, ), (1, )) + assert_size_stride(primals_83, (768, ), (1, )) + assert_size_stride(primals_84, (3072, 768), (768, 1)) + assert_size_stride(primals_85, (3072, ), (1, )) + assert_size_stride(primals_86, (768, 3072), (3072, 1)) + assert_size_stride(primals_87, (768, ), (1, )) + assert_size_stride(primals_88, (768, ), (1, )) + assert_size_stride(primals_89, (768, ), (1, )) + assert_size_stride(primals_90, (2304, 768), (768, 1)) + assert_size_stride(primals_91, (2304, ), (1, )) + assert_size_stride(primals_92, (768, 768), (768, 1)) + assert_size_stride(primals_93, (768, ), (1, )) + assert_size_stride(primals_94, (768, ), (1, )) + assert_size_stride(primals_95, (768, ), (1, )) + assert_size_stride(primals_96, (3072, 768), (768, 1)) + assert_size_stride(primals_97, (3072, ), (1, )) + assert_size_stride(primals_98, (768, 3072), (3072, 1)) + assert_size_stride(primals_99, (768, ), (1, )) + assert_size_stride(primals_100, (768, ), (1, )) + assert_size_stride(primals_101, (768, ), (1, )) + assert_size_stride(primals_102, (2304, 768), (768, 1)) + assert_size_stride(primals_103, (2304, ), (1, )) + assert_size_stride(primals_104, (768, 768), (768, 1)) + assert_size_stride(primals_105, (768, ), (1, )) + assert_size_stride(primals_106, (768, ), (1, )) + assert_size_stride(primals_107, (768, ), (1, )) + assert_size_stride(primals_108, (3072, 768), (768, 1)) + assert_size_stride(primals_109, (3072, ), (1, )) + assert_size_stride(primals_110, (768, 3072), (3072, 1)) + assert_size_stride(primals_111, (768, ), (1, )) + assert_size_stride(primals_112, (768, ), (1, )) + assert_size_stride(primals_113, (768, ), (1, )) + assert_size_stride(primals_114, (2304, 768), (768, 1)) + assert_size_stride(primals_115, (2304, ), (1, )) + assert_size_stride(primals_116, (768, 768), (768, 1)) + assert_size_stride(primals_117, (768, ), (1, )) + assert_size_stride(primals_118, (768, ), (1, )) + assert_size_stride(primals_119, (768, ), (1, )) + assert_size_stride(primals_120, (3072, 768), (768, 1)) + assert_size_stride(primals_121, (3072, ), (1, )) + assert_size_stride(primals_122, (768, 3072), (3072, 1)) + assert_size_stride(primals_123, (768, ), (1, )) + assert_size_stride(primals_124, (768, ), (1, )) + assert_size_stride(primals_125, (768, ), (1, )) + assert_size_stride(primals_126, (2304, 768), (768, 1)) + assert_size_stride(primals_127, (2304, ), (1, )) + assert_size_stride(primals_128, (768, 768), (768, 1)) + assert_size_stride(primals_129, (768, ), (1, )) + assert_size_stride(primals_130, (768, ), (1, )) + assert_size_stride(primals_131, (768, ), (1, )) + assert_size_stride(primals_132, (3072, 768), (768, 1)) + assert_size_stride(primals_133, (3072, ), (1, )) + assert_size_stride(primals_134, (768, 3072), (3072, 1)) + assert_size_stride(primals_135, (768, ), (1, )) + assert_size_stride(primals_136, (768, ), (1, )) + assert_size_stride(primals_137, (768, ), (1, )) + assert_size_stride(primals_138, (2304, 768), (768, 1)) + assert_size_stride(primals_139, (2304, ), (1, )) + assert_size_stride(primals_140, (768, 768), (768, 1)) + assert_size_stride(primals_141, (768, ), (1, )) + assert_size_stride(primals_142, (768, ), (1, )) + assert_size_stride(primals_143, (768, ), (1, )) + assert_size_stride(primals_144, (3072, 768), (768, 1)) + assert_size_stride(primals_145, (3072, ), (1, )) + assert_size_stride(primals_146, (768, 3072), (3072, 1)) + assert_size_stride(primals_147, (768, ), (1, )) + assert_size_stride(primals_148, (768, ), (1, )) + assert_size_stride(primals_149, (768, ), (1, )) + with torch.cuda._DeviceGuard(0): + torch.cuda.set_device(0) + buf0 = empty_strided_cuda((64, ), (1, ), torch.int64) + buf4 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf5 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf284 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [add, arange, layer_norm, pos_emb, tok_emb], Original ATen: [aten.add, aten.arange, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward] + stream0 = get_raw_stream(0) + triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0.run(primals_1, primals_2, primals_3, primals_4, primals_5, buf0, buf4, buf5, buf284, 64, 768, grid=grid(64), stream=stream0) + del primals_5 + buf6 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_7, reinterpret_tensor(buf5, (64, 768), (768, 1), 0), reinterpret_tensor(primals_6, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf6) + del primals_7 + # Source Nodes: [y], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf8 = buf7[0] + buf9 = buf7[1] + buf10 = buf7[2] + buf11 = buf7[3] + del buf7 + buf12 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf8, (64, 768), (768, 1), 0), reinterpret_tensor(primals_8, (768, 768), (1, 768), 0), out=buf12) + buf13 = reinterpret_tensor(buf12, (1, 64, 768), (49152, 768, 1), 0); del buf12 # reuse + buf17 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf18 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf283 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [add, layer_norm_1, pos_emb, tok_emb, x_1], Original ATen: [aten.add, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1.run(buf13, primals_1, primals_2, buf0, primals_3, primals_9, primals_10, primals_11, buf17, buf18, buf283, 64, 768, grid=grid(64), stream=stream0) + del primals_11 + del primals_3 + del primals_9 + buf19 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_2], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_13, reinterpret_tensor(buf18, (64, 768), (768, 1), 0), reinterpret_tensor(primals_12, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf19) + del primals_13 + buf20 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh, x_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf19, buf20, 196608, grid=grid(196608), stream=stream0) + buf21 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf20, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_14, (3072, 768), (1, 3072), 0), out=buf21) + buf25 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf26 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf282 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_2, x_6], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf13, buf21, primals_15, primals_16, primals_17, buf25, buf26, buf282, 64, 768, grid=grid(64), stream=stream0) + del primals_17 + buf27 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_4], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_19, reinterpret_tensor(buf26, (64, 768), (768, 1), 0), reinterpret_tensor(primals_18, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf27) + del primals_19 + # Source Nodes: [y_3], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf28 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf29 = buf28[0] + buf30 = buf28[1] + buf31 = buf28[2] + buf32 = buf28[3] + del buf28 + buf33 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf29, (64, 768), (768, 1), 0), reinterpret_tensor(primals_20, (768, 768), (1, 768), 0), out=buf33) + buf34 = reinterpret_tensor(buf33, (1, 64, 768), (49152, 768, 1), 0); del buf33 # reuse + buf38 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf39 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf281 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_3, x_6, x_7], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf34, buf13, buf21, primals_15, primals_21, primals_22, primals_23, buf38, buf39, buf281, 64, 768, grid=grid(64), stream=stream0) + del primals_15 + del primals_21 + del primals_23 + buf40 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_8], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_25, reinterpret_tensor(buf39, (64, 768), (768, 1), 0), reinterpret_tensor(primals_24, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf40) + del primals_25 + buf41 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_6, add_7, mul_4, mul_5, mul_6, pow_2, tanh_1, x_9], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf40, buf41, 196608, grid=grid(196608), stream=stream0) + buf42 = buf21; del buf21 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf41, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_26, (3072, 768), (1, 3072), 0), out=buf42) + buf46 = buf13; del buf13 # reuse + buf47 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf280 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_4, x_12], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf34, buf42, primals_27, primals_28, primals_29, buf46, buf47, buf280, 64, 768, grid=grid(64), stream=stream0) + del primals_29 + buf48 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_8], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_31, reinterpret_tensor(buf47, (64, 768), (768, 1), 0), reinterpret_tensor(primals_30, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf48) + del primals_31 + # Source Nodes: [y_6], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf49 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf50 = buf49[0] + buf51 = buf49[1] + buf52 = buf49[2] + buf53 = buf49[3] + del buf49 + buf54 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf50, (64, 768), (768, 1), 0), reinterpret_tensor(primals_32, (768, 768), (1, 768), 0), out=buf54) + buf55 = reinterpret_tensor(buf54, (1, 64, 768), (49152, 768, 1), 0); del buf54 # reuse + buf59 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf60 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf279 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_5, x_12, x_13], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf55, buf34, buf42, primals_27, primals_33, primals_34, primals_35, buf59, buf60, buf279, 64, 768, grid=grid(64), stream=stream0) + del primals_27 + del primals_33 + del primals_35 + buf61 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_14], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_37, reinterpret_tensor(buf60, (64, 768), (768, 1), 0), reinterpret_tensor(primals_36, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf61) + del primals_37 + buf62 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_10, add_11, mul_10, mul_8, mul_9, pow_3, tanh_2, x_15], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf61, buf62, 196608, grid=grid(196608), stream=stream0) + buf63 = buf42; del buf42 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf62, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_38, (3072, 768), (1, 3072), 0), out=buf63) + buf67 = buf34; del buf34 # reuse + buf68 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf278 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_6, x_18], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf55, buf63, primals_39, primals_40, primals_41, buf67, buf68, buf278, 64, 768, grid=grid(64), stream=stream0) + del primals_41 + buf69 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_12], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_43, reinterpret_tensor(buf68, (64, 768), (768, 1), 0), reinterpret_tensor(primals_42, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf69) + del primals_43 + # Source Nodes: [y_9], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf70 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf71 = buf70[0] + buf72 = buf70[1] + buf73 = buf70[2] + buf74 = buf70[3] + del buf70 + buf75 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf71, (64, 768), (768, 1), 0), reinterpret_tensor(primals_44, (768, 768), (1, 768), 0), out=buf75) + buf76 = reinterpret_tensor(buf75, (1, 64, 768), (49152, 768, 1), 0); del buf75 # reuse + buf80 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf81 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf277 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_7, x_18, x_19], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf76, buf55, buf63, primals_39, primals_45, primals_46, primals_47, buf80, buf81, buf277, 64, 768, grid=grid(64), stream=stream0) + del primals_39 + del primals_45 + del primals_47 + buf82 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_20], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_49, reinterpret_tensor(buf81, (64, 768), (768, 1), 0), reinterpret_tensor(primals_48, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf82) + del primals_49 + buf83 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_14, add_15, mul_12, mul_13, mul_14, pow_4, tanh_3, x_21], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf82, buf83, 196608, grid=grid(196608), stream=stream0) + buf84 = buf63; del buf63 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf83, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_50, (3072, 768), (1, 3072), 0), out=buf84) + buf88 = buf55; del buf55 # reuse + buf89 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf276 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_8, x_24], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf76, buf84, primals_51, primals_52, primals_53, buf88, buf89, buf276, 64, 768, grid=grid(64), stream=stream0) + del primals_53 + buf90 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_16], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_55, reinterpret_tensor(buf89, (64, 768), (768, 1), 0), reinterpret_tensor(primals_54, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf90) + del primals_55 + # Source Nodes: [y_12], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf91 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf92 = buf91[0] + buf93 = buf91[1] + buf94 = buf91[2] + buf95 = buf91[3] + del buf91 + buf96 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf92, (64, 768), (768, 1), 0), reinterpret_tensor(primals_56, (768, 768), (1, 768), 0), out=buf96) + buf97 = reinterpret_tensor(buf96, (1, 64, 768), (49152, 768, 1), 0); del buf96 # reuse + buf101 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf102 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf275 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_9, x_24, x_25], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf97, buf76, buf84, primals_51, primals_57, primals_58, primals_59, buf101, buf102, buf275, 64, 768, grid=grid(64), stream=stream0) + del primals_51 + del primals_57 + del primals_59 + buf103 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_26], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_61, reinterpret_tensor(buf102, (64, 768), (768, 1), 0), reinterpret_tensor(primals_60, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf103) + del primals_61 + buf104 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_18, add_19, mul_16, mul_17, mul_18, pow_5, tanh_4, x_27], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf103, buf104, 196608, grid=grid(196608), stream=stream0) + buf105 = buf84; del buf84 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf104, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_62, (3072, 768), (1, 3072), 0), out=buf105) + buf109 = buf76; del buf76 # reuse + buf110 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf274 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_10, x_30], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf97, buf105, primals_63, primals_64, primals_65, buf109, buf110, buf274, 64, 768, grid=grid(64), stream=stream0) + del primals_65 + buf111 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_20], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_67, reinterpret_tensor(buf110, (64, 768), (768, 1), 0), reinterpret_tensor(primals_66, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf111) + del primals_67 + # Source Nodes: [y_15], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf112 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf113 = buf112[0] + buf114 = buf112[1] + buf115 = buf112[2] + buf116 = buf112[3] + del buf112 + buf117 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf113, (64, 768), (768, 1), 0), reinterpret_tensor(primals_68, (768, 768), (1, 768), 0), out=buf117) + buf118 = reinterpret_tensor(buf117, (1, 64, 768), (49152, 768, 1), 0); del buf117 # reuse + buf122 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf123 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf273 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_11, x_30, x_31], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf118, buf97, buf105, primals_63, primals_69, primals_70, primals_71, buf122, buf123, buf273, 64, 768, grid=grid(64), stream=stream0) + del primals_63 + del primals_69 + del primals_71 + buf124 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_32], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_73, reinterpret_tensor(buf123, (64, 768), (768, 1), 0), reinterpret_tensor(primals_72, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf124) + del primals_73 + buf125 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_22, add_23, mul_20, mul_21, mul_22, pow_6, tanh_5, x_33], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf124, buf125, 196608, grid=grid(196608), stream=stream0) + buf126 = reinterpret_tensor(buf97, (64, 768), (768, 1), 0); del buf97 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf125, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_74, (3072, 768), (1, 3072), 0), out=buf126) + buf130 = reinterpret_tensor(buf105, (1, 64, 768), (49152, 768, 1), 0); del buf105 # reuse + buf131 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf272 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_12, x_36], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf118, buf126, primals_75, primals_76, primals_77, buf130, buf131, buf272, 64, 768, grid=grid(64), stream=stream0) + del primals_77 + buf132 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_24], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_79, reinterpret_tensor(buf131, (64, 768), (768, 1), 0), reinterpret_tensor(primals_78, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf132) + del primals_79 + # Source Nodes: [y_18], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf133 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf134 = buf133[0] + buf135 = buf133[1] + buf136 = buf133[2] + buf137 = buf133[3] + del buf133 + buf138 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf134, (64, 768), (768, 1), 0), reinterpret_tensor(primals_80, (768, 768), (1, 768), 0), out=buf138) + buf139 = reinterpret_tensor(buf138, (1, 64, 768), (49152, 768, 1), 0); del buf138 # reuse + buf143 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf144 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf271 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_13, x_36, x_37], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf139, buf118, buf126, primals_75, primals_81, primals_82, primals_83, buf143, buf144, buf271, 64, 768, grid=grid(64), stream=stream0) + del primals_75 + del primals_81 + del primals_83 + buf145 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_38], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_85, reinterpret_tensor(buf144, (64, 768), (768, 1), 0), reinterpret_tensor(primals_84, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf145) + del primals_85 + buf146 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_26, add_27, mul_24, mul_25, mul_26, pow_7, tanh_6, x_39], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf145, buf146, 196608, grid=grid(196608), stream=stream0) + buf147 = buf126; del buf126 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf146, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_86, (3072, 768), (1, 3072), 0), out=buf147) + buf151 = buf118; del buf118 # reuse + buf152 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf270 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_14, x_42], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf139, buf147, primals_87, primals_88, primals_89, buf151, buf152, buf270, 64, 768, grid=grid(64), stream=stream0) + del primals_89 + buf153 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_28], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_91, reinterpret_tensor(buf152, (64, 768), (768, 1), 0), reinterpret_tensor(primals_90, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf153) + del primals_91 + # Source Nodes: [y_21], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf154 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf155 = buf154[0] + buf156 = buf154[1] + buf157 = buf154[2] + buf158 = buf154[3] + del buf154 + buf159 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf155, (64, 768), (768, 1), 0), reinterpret_tensor(primals_92, (768, 768), (1, 768), 0), out=buf159) + buf160 = reinterpret_tensor(buf159, (1, 64, 768), (49152, 768, 1), 0); del buf159 # reuse + buf164 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf165 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf269 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_15, x_42, x_43], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf160, buf139, buf147, primals_87, primals_93, primals_94, primals_95, buf164, buf165, buf269, 64, 768, grid=grid(64), stream=stream0) + del primals_87 + del primals_93 + del primals_95 + buf166 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_44], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_97, reinterpret_tensor(buf165, (64, 768), (768, 1), 0), reinterpret_tensor(primals_96, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf166) + del primals_97 + buf167 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_30, add_31, mul_28, mul_29, mul_30, pow_8, tanh_7, x_45], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf166, buf167, 196608, grid=grid(196608), stream=stream0) + buf168 = buf147; del buf147 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf167, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_98, (3072, 768), (1, 3072), 0), out=buf168) + buf172 = buf139; del buf139 # reuse + buf173 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf268 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_16, x_48], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf160, buf168, primals_99, primals_100, primals_101, buf172, buf173, buf268, 64, 768, grid=grid(64), stream=stream0) + del primals_101 + buf174 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_32], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_103, reinterpret_tensor(buf173, (64, 768), (768, 1), 0), reinterpret_tensor(primals_102, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf174) + del primals_103 + # Source Nodes: [y_24], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf175 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf176 = buf175[0] + buf177 = buf175[1] + buf178 = buf175[2] + buf179 = buf175[3] + del buf175 + buf180 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf176, (64, 768), (768, 1), 0), reinterpret_tensor(primals_104, (768, 768), (1, 768), 0), out=buf180) + buf181 = reinterpret_tensor(buf180, (1, 64, 768), (49152, 768, 1), 0); del buf180 # reuse + buf185 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf186 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf267 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_17, x_48, x_49], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf181, buf160, buf168, primals_99, primals_105, primals_106, primals_107, buf185, buf186, buf267, 64, 768, grid=grid(64), stream=stream0) + del primals_105 + del primals_107 + del primals_99 + buf187 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_50], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_109, reinterpret_tensor(buf186, (64, 768), (768, 1), 0), reinterpret_tensor(primals_108, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf187) + del primals_109 + buf188 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_34, add_35, mul_32, mul_33, mul_34, pow_9, tanh_8, x_51], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf187, buf188, 196608, grid=grid(196608), stream=stream0) + buf189 = buf168; del buf168 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf188, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_110, (3072, 768), (1, 3072), 0), out=buf189) + buf193 = buf160; del buf160 # reuse + buf194 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf266 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_18, x_54], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf181, buf189, primals_111, primals_112, primals_113, buf193, buf194, buf266, 64, 768, grid=grid(64), stream=stream0) + del primals_113 + buf195 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_36], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_115, reinterpret_tensor(buf194, (64, 768), (768, 1), 0), reinterpret_tensor(primals_114, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf195) + del primals_115 + # Source Nodes: [y_27], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf196 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf197 = buf196[0] + buf198 = buf196[1] + buf199 = buf196[2] + buf200 = buf196[3] + del buf196 + buf201 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf197, (64, 768), (768, 1), 0), reinterpret_tensor(primals_116, (768, 768), (1, 768), 0), out=buf201) + buf202 = reinterpret_tensor(buf201, (1, 64, 768), (49152, 768, 1), 0); del buf201 # reuse + buf206 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf207 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf265 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_19, x_54, x_55], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf202, buf181, buf189, primals_111, primals_117, primals_118, primals_119, buf206, buf207, buf265, 64, 768, grid=grid(64), stream=stream0) + del primals_111 + del primals_117 + del primals_119 + buf208 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_56], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_121, reinterpret_tensor(buf207, (64, 768), (768, 1), 0), reinterpret_tensor(primals_120, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf208) + del primals_121 + buf209 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_38, add_39, mul_36, mul_37, mul_38, pow_10, tanh_9, x_57], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf208, buf209, 196608, grid=grid(196608), stream=stream0) + buf210 = buf189; del buf189 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf209, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_122, (3072, 768), (1, 3072), 0), out=buf210) + buf214 = buf181; del buf181 # reuse + buf215 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf264 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_20, x_60], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf202, buf210, primals_123, primals_124, primals_125, buf214, buf215, buf264, 64, 768, grid=grid(64), stream=stream0) + del primals_125 + buf216 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_40], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_127, reinterpret_tensor(buf215, (64, 768), (768, 1), 0), reinterpret_tensor(primals_126, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf216) + del primals_127 + # Source Nodes: [y_30], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf217 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf218 = buf217[0] + buf219 = buf217[1] + buf220 = buf217[2] + buf221 = buf217[3] + del buf217 + buf222 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf218, (64, 768), (768, 1), 0), reinterpret_tensor(primals_128, (768, 768), (1, 768), 0), out=buf222) + buf223 = reinterpret_tensor(buf222, (1, 64, 768), (49152, 768, 1), 0); del buf222 # reuse + buf227 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf228 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf263 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_21, x_60, x_61], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf223, buf202, buf210, primals_123, primals_129, primals_130, primals_131, buf227, buf228, buf263, 64, 768, grid=grid(64), stream=stream0) + del primals_123 + del primals_129 + del primals_131 + buf229 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_62], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_133, reinterpret_tensor(buf228, (64, 768), (768, 1), 0), reinterpret_tensor(primals_132, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf229) + del primals_133 + buf230 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_42, add_43, mul_40, mul_41, mul_42, pow_11, tanh_10, x_63], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf229, buf230, 196608, grid=grid(196608), stream=stream0) + buf231 = buf210; del buf210 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf230, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_134, (3072, 768), (1, 3072), 0), out=buf231) + buf235 = buf202; del buf202 # reuse + buf236 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf262 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_22, x_66], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf223, buf231, primals_135, primals_136, primals_137, buf235, buf236, buf262, 64, 768, grid=grid(64), stream=stream0) + del primals_137 + buf237 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32) + # Source Nodes: [linear_44], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_139, reinterpret_tensor(buf236, (64, 768), (768, 1), 0), reinterpret_tensor(primals_138, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf237) + del primals_139 + # Source Nodes: [y_33], Original ATen: [aten._scaled_dot_product_efficient_attention] + buf238 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True) + buf239 = buf238[0] + buf240 = buf238[1] + buf241 = buf238[2] + buf242 = buf238[3] + del buf238 + buf243 = empty_strided_cuda((64, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf239, (64, 768), (768, 1), 0), reinterpret_tensor(primals_140, (768, 768), (1, 768), 0), out=buf243) + buf244 = reinterpret_tensor(buf243, (1, 64, 768), (49152, 768, 1), 0); del buf243 # reuse + buf248 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf249 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + buf261 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [layer_norm_23, x_66, x_67], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf244, buf223, buf231, primals_135, primals_141, primals_142, primals_143, buf248, buf249, buf261, 64, 768, grid=grid(64), stream=stream0) + del primals_135 + del primals_141 + del primals_143 + buf250 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [x_68], Original ATen: [aten.addmm] + extern_kernels.addmm(primals_145, reinterpret_tensor(buf249, (64, 768), (768, 1), 0), reinterpret_tensor(primals_144, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf250) + del primals_145 + buf251 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32) + # Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11, x_69], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh] + triton_poi_fused_add_mul_pow_tanh_2.run(buf250, buf251, 196608, grid=grid(196608), stream=stream0) + buf252 = buf231; del buf231 # reuse + # Source Nodes: [], Original ATen: [] + extern_kernels.mm(reinterpret_tensor(buf251, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_146, (3072, 768), (1, 3072), 0), out=buf252) + buf256 = buf223; del buf223 # reuse + buf260 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32) + # Source Nodes: [x_72, x_73], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5.run(buf244, buf252, primals_147, buf256, buf260, 64, 768, grid=grid(64), stream=stream0) + del buf244 + del buf252 + del primals_147 + buf257 = empty_strided_cuda((1, ), (1, ), torch.int64) + # Source Nodes: [getitem_36], Original ATen: [aten.lift_fresh] + triton_poi_fused_lift_fresh_6.run(buf257, 1, grid=grid(1), stream=stream0) + buf258 = empty_strided_cuda((1, 1, 768), (768, 768, 1), torch.float32) + # Source Nodes: [getitem_36, x_73], Original ATen: [aten.index, aten.native_layer_norm] + triton_poi_fused_index_native_layer_norm_7.run(buf256, primals_148, primals_149, buf258, 768, grid=grid(768), stream=stream0) + del primals_149 + buf259 = empty_strided_cuda((1, 50304), (50304, 1), torch.float32) + # Source Nodes: [logits], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf258, (1, 768), (0, 1), 0), reinterpret_tensor(primals_2, (768, 50304), (1, 768), 0), out=buf259) + return (reinterpret_tensor(buf259, (1, 1, 50304), (50304, 50304, 1), 0), primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, reinterpret_tensor(buf0, (1, 64), (64, 1), 0), buf4, reinterpret_tensor(buf5, (64, 768), (768, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf8, buf9, buf10, buf11, buf17, reinterpret_tensor(buf18, (64, 768), (768, 1), 0), buf19, reinterpret_tensor(buf20, (64, 3072), (3072, 1), 0), buf25, reinterpret_tensor(buf26, (64, 768), (768, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf29, buf30, buf31, buf32, buf38, reinterpret_tensor(buf39, (64, 768), (768, 1), 0), buf40, reinterpret_tensor(buf41, (64, 3072), (3072, 1), 0), buf46, reinterpret_tensor(buf47, (64, 768), (768, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf50, buf51, buf52, buf53, buf59, reinterpret_tensor(buf60, (64, 768), (768, 1), 0), buf61, reinterpret_tensor(buf62, (64, 3072), (3072, 1), 0), buf67, reinterpret_tensor(buf68, (64, 768), (768, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf71, buf72, buf73, buf74, buf80, reinterpret_tensor(buf81, (64, 768), (768, 1), 0), buf82, reinterpret_tensor(buf83, (64, 3072), (3072, 1), 0), buf88, reinterpret_tensor(buf89, (64, 768), (768, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf92, buf93, buf94, buf95, buf101, reinterpret_tensor(buf102, (64, 768), (768, 1), 0), buf103, reinterpret_tensor(buf104, (64, 3072), (3072, 1), 0), buf109, reinterpret_tensor(buf110, (64, 768), (768, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf113, buf114, buf115, buf116, buf122, reinterpret_tensor(buf123, (64, 768), (768, 1), 0), buf124, reinterpret_tensor(buf125, (64, 3072), (3072, 1), 0), buf130, reinterpret_tensor(buf131, (64, 768), (768, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf134, buf135, buf136, buf137, buf143, reinterpret_tensor(buf144, (64, 768), (768, 1), 0), buf145, reinterpret_tensor(buf146, (64, 3072), (3072, 1), 0), buf151, reinterpret_tensor(buf152, (64, 768), (768, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf155, buf156, buf157, buf158, buf164, reinterpret_tensor(buf165, (64, 768), (768, 1), 0), buf166, reinterpret_tensor(buf167, (64, 3072), (3072, 1), 0), buf172, reinterpret_tensor(buf173, (64, 768), (768, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf176, buf177, buf178, buf179, buf185, reinterpret_tensor(buf186, (64, 768), (768, 1), 0), buf187, reinterpret_tensor(buf188, (64, 3072), (3072, 1), 0), buf193, reinterpret_tensor(buf194, (64, 768), (768, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf197, buf198, buf199, buf200, buf206, reinterpret_tensor(buf207, (64, 768), (768, 1), 0), buf208, reinterpret_tensor(buf209, (64, 3072), (3072, 1), 0), buf214, reinterpret_tensor(buf215, (64, 768), (768, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf218, buf219, buf220, buf221, buf227, reinterpret_tensor(buf228, (64, 768), (768, 1), 0), buf229, reinterpret_tensor(buf230, (64, 3072), (3072, 1), 0), buf235, reinterpret_tensor(buf236, (64, 768), (768, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf239, buf240, buf241, buf242, buf248, reinterpret_tensor(buf249, (64, 768), (768, 1), 0), buf250, reinterpret_tensor(buf251, (64, 3072), (3072, 1), 0), buf256, buf257, reinterpret_tensor(buf258, (1, 768), (768, 1), 0), primals_2, buf260, primals_146, primals_144, buf261, primals_140, primals_138, buf262, primals_134, primals_132, buf263, primals_128, primals_126, buf264, primals_122, primals_120, buf265, primals_116, primals_114, buf266, primals_110, primals_108, buf267, primals_104, primals_102, buf268, primals_98, primals_96, buf269, primals_92, primals_90, buf270, primals_86, primals_84, buf271, primals_80, primals_78, buf272, primals_74, primals_72, buf273, primals_68, primals_66, buf274, primals_62, primals_60, buf275, primals_56, primals_54, buf276, primals_50, primals_48, buf277, primals_44, primals_42, buf278, primals_38, primals_36, buf279, primals_32, primals_30, buf280, primals_26, primals_24, buf281, primals_20, primals_18, buf282, primals_14, primals_12, buf283, primals_8, primals_6, buf284, ) + + + def benchmark_compiled_module(times=10, repeat=10): + from torch._dynamo.testing import rand_strided + from torch._inductor.utils import print_performance + primals_1 = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64) + primals_2 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_3 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_4 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_5 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_6 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_7 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_8 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_9 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_10 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_11 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_12 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_13 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_14 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_15 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_16 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_17 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_18 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_19 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_20 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_21 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_22 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_23 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_24 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_25 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_26 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_27 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_28 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_29 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_30 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_31 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_32 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_33 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_34 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_35 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_36 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_37 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_38 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_39 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_40 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_41 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_42 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_43 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_44 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_45 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_46 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_47 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_48 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_49 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_50 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_51 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_52 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_53 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_54 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_55 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_56 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_57 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_58 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_59 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_60 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_61 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_62 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_63 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_64 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_65 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_66 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_67 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_68 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_69 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_70 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_71 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_72 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_73 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_74 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_75 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_76 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_77 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_78 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_79 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_80 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_81 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_82 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_83 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_84 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_85 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_86 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_87 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_88 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_89 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_90 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_91 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_92 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_93 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_94 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_95 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_96 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_97 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_98 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_99 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_100 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_101 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_102 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_103 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_104 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_105 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_106 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_107 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_108 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_109 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_110 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_111 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_112 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_113 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_114 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_115 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_116 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_117 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_118 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_119 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_120 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_121 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_122 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_123 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_124 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_125 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_126 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_127 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_128 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_129 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_130 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_131 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_132 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_133 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_134 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_135 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_136 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_137 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_138 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_139 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_140 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_141 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_142 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_143 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_144 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + primals_145 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_146 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + primals_147 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_148 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_149 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + fn = lambda: call([primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149]) + return print_performance(fn, times=times, repeat=repeat) + + + if __name__ == "__main__": + from torch._inductor.wrapper_benchmark import compiled_module_main + compiled_module_main('nanogpt', benchmark_compiled_module) + +V0806 13:56:00.675000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "01eed3091dbbef1e669dd3cfc3e8bc47"} + { + "name": "code_gen", + "ts": 1722977760675133.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.675000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "30b06250406c3bf4a74c6efec9b9ba81"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977760675319.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.757000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "8e46dd4165fe90419697fbe6a8a7e189"} + { + "name": "fx_graph_cache_miss", + "ts": 1722977756402576.5, + "args": { + "key": "fawswmdqdoeabru4cngdomrqdcmfg5ehi5bfp4lz3lpw74xd2r5q", + "cache_state": "miss", + "components": [ + "[7gdnkoxeguoowip7ectpux5j62uq56ccdoktbdshbvoqarzspmh] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149):\n iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)\n unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0); iota = None\n embedding = torch.ops.aten.embedding.default(primals_2, primals_1)\n embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None\n add = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None\n var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)\n getitem = var_mean[0]\n getitem_1 = var_mean[1]; var_mean = None\n add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None\n rsqrt = torch.ops.aten.rsqrt.default(add_1); add_1 = None\n sub = torch.ops.aten.sub.Tensor(add, getitem_1); getitem_1 = None\n mul = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None\n mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4)\n add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None\n view = torch.ops.aten.view.default(add_2, [64, 768]); add_2 = None\n permute = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None\n addmm = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None\n view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]); addmm = None\n split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None\n getitem_2 = split[0]\n getitem_3 = split[1]\n getitem_4 = split[2]; split = None\n view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None\n permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None\n view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None\n permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None\n view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None\n permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None\n _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)\n getitem_5 = _scaled_dot_product_efficient_attention[0]\n getitem_6 = _scaled_dot_product_efficient_attention[1]\n getitem_7 = _scaled_dot_product_efficient_attention[2]\n getitem_8 = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None\n permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None\n view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None\n permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None\n addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5); primals_9 = view_6 = None\n view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]); addmm_1 = None\n add_3 = torch.ops.aten.add.Tensor(add, view_7); add = view_7 = None\n var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)\n getitem_9 = var_mean_1[0]\n getitem_10 = var_mean_1[1]; var_mean_1 = None\n add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None\n rsqrt_1 = torch.ops.aten.rsqrt.default(add_4); add_4 = None\n sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10); getitem_10 = None\n mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None\n mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10)\n add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None\n view_8 = torch.ops.aten.view.default(add_5, [64, 768]); add_5 = None\n permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None\n addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None\n view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])\n mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None\n add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); view_9 = mul_5 = None\n mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None\n tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None\n add_7 = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None\n mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7); mul_4 = add_7 = None\n view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]); mul_7 = None\n permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None\n addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7); primals_15 = None\n view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]); addmm_3 = None\n add_8 = torch.ops.aten.add.Tensor(add_3, view_11); add_3 = view_11 = None\n var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)\n getitem_11 = var_mean_2[0]\n getitem_12 = var_mean_2[1]; var_mean_2 = None\n add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None\n rsqrt_2 = torch.ops.aten.rsqrt.default(add_9); add_9 = None\n sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12); getitem_12 = None\n mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None\n mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16)\n add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None\n view_12 = torch.ops.aten.view.default(add_10, [64, 768]); add_10 = None\n permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None\n addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None\n view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]); addmm_4 = None\n split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None\n getitem_13 = split_1[0]\n getitem_14 = split_1[1]\n getitem_15 = split_1[2]; split_1 = None\n view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None\n permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None\n view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None\n permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None\n view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None\n permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None\n _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)\n getitem_16 = _scaled_dot_product_efficient_attention_1[0]\n getitem_17 = _scaled_dot_product_efficient_attention_1[1]\n getitem_18 = _scaled_dot_product_efficient_attention_1[2]\n getitem_19 = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None\n permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None\n view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None\n permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None\n addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13); primals_21 = view_18 = None\n view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]); addmm_5 = None\n add_11 = torch.ops.aten.add.Tensor(add_8, view_19); add_8 = view_19 = None\n var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)\n getitem_20 = var_mean_3[0]\n getitem_21 = var_mean_3[1]; var_mean_3 = None\n add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None\n rsqrt_3 = torch.ops.aten.rsqrt.default(add_12); add_12 = None\n sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21); getitem_21 = None\n mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None\n mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22)\n add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None\n view_20 = torch.ops.aten.view.default(add_13, [64, 768]); add_13 = None\n permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None\n addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None\n view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])\n mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None\n add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); view_21 = mul_13 = None\n mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None\n tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None\n add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None\n mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15); mul_12 = add_15 = None\n view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]); mul_15 = None\n permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None\n addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15); primals_27 = None\n view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]); addmm_7 = None\n add_16 = torch.ops.aten.add.Tensor(add_11, view_23); add_11 = view_23 = None\n var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)\n getitem_22 = var_mean_4[0]\n getitem_23 = var_mean_4[1]; var_mean_4 = None\n add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None\n rsqrt_4 = torch.ops.aten.rsqrt.default(add_17); add_17 = None\n sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23); getitem_23 = None\n mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None\n mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28)\n add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None\n view_24 = torch.ops.aten.view.default(add_18, [64, 768]); add_18 = None\n permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None\n addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None\n view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]); addmm_8 = None\n split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None\n getitem_24 = split_2[0]\n getitem_25 = split_2[1]\n getitem_26 = split_2[2]; split_2 = None\n view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None\n permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None\n view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None\n permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None\n view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None\n permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None\n _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)\n getitem_27 = _scaled_dot_product_efficient_attention_2[0]\n getitem_28 = _scaled_dot_product_efficient_attention_2[1]\n getitem_29 = _scaled_dot_product_efficient_attention_2[2]\n getitem_30 = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None\n permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None\n view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None\n permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None\n addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21); primals_33 = view_30 = None\n view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]); addmm_9 = None\n add_19 = torch.ops.aten.add.Tensor(add_16, view_31); add_16 = view_31 = None\n var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)\n getitem_31 = var_mean_5[0]\n getitem_32 = var_mean_5[1]; var_mean_5 = None\n add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None\n rsqrt_5 = torch.ops.aten.rsqrt.default(add_20); add_20 = None\n sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32); getitem_32 = None\n mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None\n mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34)\n add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None\n view_32 = torch.ops.aten.view.default(add_21, [64, 768]); add_21 = None\n permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None\n addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None\n view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])\n mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None\n add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); view_33 = mul_21 = None\n mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None\n tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None\n add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None\n mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23); mul_20 = add_23 = None\n view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]); mul_23 = None\n permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None\n addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23); primals_39 = None\n view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]); addmm_11 = None\n add_24 = torch.ops.aten.add.Tensor(add_19, view_35); add_19 = view_35 = None\n var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)\n getitem_33 = var_mean_6[0]\n getitem_34 = var_mean_6[1]; var_mean_6 = None\n add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None\n rsqrt_6 = torch.ops.aten.rsqrt.default(add_25); add_25 = None\n sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34); getitem_34 = None\n mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None\n mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40)\n add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None\n view_36 = torch.ops.aten.view.default(add_26, [64, 768]); add_26 = None\n permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None\n addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None\n view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]); addmm_12 = None\n split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None\n getitem_35 = split_3[0]\n getitem_36 = split_3[1]\n getitem_37 = split_3[2]; split_3 = None\n view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None\n permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None\n view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None\n permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None\n view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None\n permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None\n _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)\n getitem_38 = _scaled_dot_product_efficient_attention_3[0]\n getitem_39 = _scaled_dot_product_efficient_attention_3[1]\n getitem_40 = _scaled_dot_product_efficient_attention_3[2]\n getitem_41 = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None\n permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None\n view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None\n permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None\n addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29); primals_45 = view_42 = None\n view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]); addmm_13 = None\n add_27 = torch.ops.aten.add.Tensor(add_24, view_43); add_24 = view_43 = None\n var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)\n getitem_42 = var_mean_7[0]\n getitem_43 = var_mean_7[1]; var_mean_7 = None\n add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None\n rsqrt_7 = torch.ops.aten.rsqrt.default(add_28); add_28 = None\n sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43); getitem_43 = None\n mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None\n mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46)\n add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None\n view_44 = torch.ops.aten.view.default(add_29, [64, 768]); add_29 = None\n permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None\n addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None\n view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])\n mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None\n add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); view_45 = mul_29 = None\n mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None\n tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None\n add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None\n mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31); mul_28 = add_31 = None\n view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]); mul_31 = None\n permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None\n addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31); primals_51 = None\n view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]); addmm_15 = None\n add_32 = torch.ops.aten.add.Tensor(add_27, view_47); add_27 = view_47 = None\n var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)\n getitem_44 = var_mean_8[0]\n getitem_45 = var_mean_8[1]; var_mean_8 = None\n add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None\n rsqrt_8 = torch.ops.aten.rsqrt.default(add_33); add_33 = None\n sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45); getitem_45 = None\n mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None\n mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52)\n add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None\n view_48 = torch.ops.aten.view.default(add_34, [64, 768]); add_34 = None\n permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None\n addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None\n view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]); addmm_16 = None\n split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None\n getitem_46 = split_4[0]\n getitem_47 = split_4[1]\n getitem_48 = split_4[2]; split_4 = None\n view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None\n permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None\n view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None\n permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None\n view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None\n permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None\n _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)\n getitem_49 = _scaled_dot_product_efficient_attention_4[0]\n getitem_50 = _scaled_dot_product_efficient_attention_4[1]\n getitem_51 = _scaled_dot_product_efficient_attention_4[2]\n getitem_52 = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None\n permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None\n view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None\n permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None\n addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37); primals_57 = view_54 = None\n view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]); addmm_17 = None\n add_35 = torch.ops.aten.add.Tensor(add_32, view_55); add_32 = view_55 = None\n var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)\n getitem_53 = var_mean_9[0]\n getitem_54 = var_mean_9[1]; var_mean_9 = None\n add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None\n rsqrt_9 = torch.ops.aten.rsqrt.default(add_36); add_36 = None\n sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54); getitem_54 = None\n mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None\n mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58)\n add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None\n view_56 = torch.ops.aten.view.default(add_37, [64, 768]); add_37 = None\n permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None\n addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None\n view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])\n mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None\n add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); view_57 = mul_37 = None\n mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None\n tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None\n add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None\n mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39); mul_36 = add_39 = None\n view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]); mul_39 = None\n permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None\n addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39); primals_63 = None\n view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]); addmm_19 = None\n add_40 = torch.ops.aten.add.Tensor(add_35, view_59); add_35 = view_59 = None\n var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)\n getitem_55 = var_mean_10[0]\n getitem_56 = var_mean_10[1]; var_mean_10 = None\n add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None\n rsqrt_10 = torch.ops.aten.rsqrt.default(add_41); add_41 = None\n sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56); getitem_56 = None\n mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None\n mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64)\n add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None\n view_60 = torch.ops.aten.view.default(add_42, [64, 768]); add_42 = None\n permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None\n addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None\n view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]); addmm_20 = None\n split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None\n getitem_57 = split_5[0]\n getitem_58 = split_5[1]\n getitem_59 = split_5[2]; split_5 = None\n view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None\n permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None\n view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None\n permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None\n view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None\n permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None\n _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)\n getitem_60 = _scaled_dot_product_efficient_attention_5[0]\n getitem_61 = _scaled_dot_product_efficient_attention_5[1]\n getitem_62 = _scaled_dot_product_efficient_attention_5[2]\n getitem_63 = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None\n permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None\n view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None\n permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None\n addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45); primals_69 = view_66 = None\n view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]); addmm_21 = None\n add_43 = torch.ops.aten.add.Tensor(add_40, view_67); add_40 = view_67 = None\n var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)\n getitem_64 = var_mean_11[0]\n getitem_65 = var_mean_11[1]; var_mean_11 = None\n add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None\n rsqrt_11 = torch.ops.aten.rsqrt.default(add_44); add_44 = None\n sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65); getitem_65 = None\n mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None\n mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70)\n add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None\n view_68 = torch.ops.aten.view.default(add_45, [64, 768]); add_45 = None\n permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None\n addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None\n view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])\n mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None\n add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); view_69 = mul_45 = None\n mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None\n tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None\n add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None\n mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47); mul_44 = add_47 = None\n view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]); mul_47 = None\n permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None\n addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47); primals_75 = None\n view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]); addmm_23 = None\n add_48 = torch.ops.aten.add.Tensor(add_43, view_71); add_43 = view_71 = None\n var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)\n getitem_66 = var_mean_12[0]\n getitem_67 = var_mean_12[1]; var_mean_12 = None\n add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None\n rsqrt_12 = torch.ops.aten.rsqrt.default(add_49); add_49 = None\n sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67); getitem_67 = None\n mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None\n mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76)\n add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None\n view_72 = torch.ops.aten.view.default(add_50, [64, 768]); add_50 = None\n permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None\n addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None\n view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]); addmm_24 = None\n split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None\n getitem_68 = split_6[0]\n getitem_69 = split_6[1]\n getitem_70 = split_6[2]; split_6 = None\n view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None\n permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None\n view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None\n permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None\n view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None\n permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None\n _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)\n getitem_71 = _scaled_dot_product_efficient_attention_6[0]\n getitem_72 = _scaled_dot_product_efficient_attention_6[1]\n getitem_73 = _scaled_dot_product_efficient_attention_6[2]\n getitem_74 = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None\n permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None\n view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None\n permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None\n addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53); primals_81 = view_78 = None\n view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]); addmm_25 = None\n add_51 = torch.ops.aten.add.Tensor(add_48, view_79); add_48 = view_79 = None\n var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)\n getitem_75 = var_mean_13[0]\n getitem_76 = var_mean_13[1]; var_mean_13 = None\n add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None\n rsqrt_13 = torch.ops.aten.rsqrt.default(add_52); add_52 = None\n sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76); getitem_76 = None\n mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None\n mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82)\n add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None\n view_80 = torch.ops.aten.view.default(add_53, [64, 768]); add_53 = None\n permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None\n addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None\n view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])\n mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None\n add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); view_81 = mul_53 = None\n mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None\n tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None\n add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None\n mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55); mul_52 = add_55 = None\n view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]); mul_55 = None\n permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None\n addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55); primals_87 = None\n view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]); addmm_27 = None\n add_56 = torch.ops.aten.add.Tensor(add_51, view_83); add_51 = view_83 = None\n var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)\n getitem_77 = var_mean_14[0]\n getitem_78 = var_mean_14[1]; var_mean_14 = None\n add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None\n rsqrt_14 = torch.ops.aten.rsqrt.default(add_57); add_57 = None\n sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78); getitem_78 = None\n mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None\n mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88)\n add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None\n view_84 = torch.ops.aten.view.default(add_58, [64, 768]); add_58 = None\n permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None\n addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None\n view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]); addmm_28 = None\n split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None\n getitem_79 = split_7[0]\n getitem_80 = split_7[1]\n getitem_81 = split_7[2]; split_7 = None\n view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None\n permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None\n view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None\n permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None\n view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None\n permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None\n _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)\n getitem_82 = _scaled_dot_product_efficient_attention_7[0]\n getitem_83 = _scaled_dot_product_efficient_attention_7[1]\n getitem_84 = _scaled_dot_product_efficient_attention_7[2]\n getitem_85 = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None\n permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None\n view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None\n permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None\n addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61); primals_93 = view_90 = None\n view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]); addmm_29 = None\n add_59 = torch.ops.aten.add.Tensor(add_56, view_91); add_56 = view_91 = None\n var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)\n getitem_86 = var_mean_15[0]\n getitem_87 = var_mean_15[1]; var_mean_15 = None\n add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None\n rsqrt_15 = torch.ops.aten.rsqrt.default(add_60); add_60 = None\n sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87); getitem_87 = None\n mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None\n mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94)\n add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None\n view_92 = torch.ops.aten.view.default(add_61, [64, 768]); add_61 = None\n permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None\n addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None\n view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])\n mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None\n add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); view_93 = mul_61 = None\n mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None\n tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None\n add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None\n mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63); mul_60 = add_63 = None\n view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]); mul_63 = None\n permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None\n addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63); primals_99 = None\n view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]); addmm_31 = None\n add_64 = torch.ops.aten.add.Tensor(add_59, view_95); add_59 = view_95 = None\n var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)\n getitem_88 = var_mean_16[0]\n getitem_89 = var_mean_16[1]; var_mean_16 = None\n add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None\n rsqrt_16 = torch.ops.aten.rsqrt.default(add_65); add_65 = None\n sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89); getitem_89 = None\n mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None\n mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100)\n add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None\n view_96 = torch.ops.aten.view.default(add_66, [64, 768]); add_66 = None\n permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None\n addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None\n view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]); addmm_32 = None\n split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None\n getitem_90 = split_8[0]\n getitem_91 = split_8[1]\n getitem_92 = split_8[2]; split_8 = None\n view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None\n permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None\n view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None\n permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None\n view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None\n permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None\n _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)\n getitem_93 = _scaled_dot_product_efficient_attention_8[0]\n getitem_94 = _scaled_dot_product_efficient_attention_8[1]\n getitem_95 = _scaled_dot_product_efficient_attention_8[2]\n getitem_96 = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None\n permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None\n view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None\n permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None\n addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69); primals_105 = view_102 = None\n view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]); addmm_33 = None\n add_67 = torch.ops.aten.add.Tensor(add_64, view_103); add_64 = view_103 = None\n var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)\n getitem_97 = var_mean_17[0]\n getitem_98 = var_mean_17[1]; var_mean_17 = None\n add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None\n rsqrt_17 = torch.ops.aten.rsqrt.default(add_68); add_68 = None\n sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98); getitem_98 = None\n mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None\n mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106)\n add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None\n view_104 = torch.ops.aten.view.default(add_69, [64, 768]); add_69 = None\n permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None\n addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None\n view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])\n mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None\n add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); view_105 = mul_69 = None\n mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None\n tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None\n add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None\n mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71); mul_68 = add_71 = None\n view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]); mul_71 = None\n permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None\n addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71); primals_111 = None\n view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]); addmm_35 = None\n add_72 = torch.ops.aten.add.Tensor(add_67, view_107); add_67 = view_107 = None\n var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)\n getitem_99 = var_mean_18[0]\n getitem_100 = var_mean_18[1]; var_mean_18 = None\n add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None\n rsqrt_18 = torch.ops.aten.rsqrt.default(add_73); add_73 = None\n sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100); getitem_100 = None\n mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None\n mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112)\n add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None\n view_108 = torch.ops.aten.view.default(add_74, [64, 768]); add_74 = None\n permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None\n addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None\n view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]); addmm_36 = None\n split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None\n getitem_101 = split_9[0]\n getitem_102 = split_9[1]\n getitem_103 = split_9[2]; split_9 = None\n view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None\n permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None\n view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None\n permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None\n view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None\n permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None\n _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)\n getitem_104 = _scaled_dot_product_efficient_attention_9[0]\n getitem_105 = _scaled_dot_product_efficient_attention_9[1]\n getitem_106 = _scaled_dot_product_efficient_attention_9[2]\n getitem_107 = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None\n permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None\n view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None\n permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None\n addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77); primals_117 = view_114 = None\n view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]); addmm_37 = None\n add_75 = torch.ops.aten.add.Tensor(add_72, view_115); add_72 = view_115 = None\n var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)\n getitem_108 = var_mean_19[0]\n getitem_109 = var_mean_19[1]; var_mean_19 = None\n add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None\n rsqrt_19 = torch.ops.aten.rsqrt.default(add_76); add_76 = None\n sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109); getitem_109 = None\n mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None\n mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118)\n add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None\n view_116 = torch.ops.aten.view.default(add_77, [64, 768]); add_77 = None\n permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None\n addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None\n view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])\n mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None\n add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); view_117 = mul_77 = None\n mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None\n tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None\n add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None\n mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79); mul_76 = add_79 = None\n view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]); mul_79 = None\n permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None\n addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79); primals_123 = None\n view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]); addmm_39 = None\n add_80 = torch.ops.aten.add.Tensor(add_75, view_119); add_75 = view_119 = None\n var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)\n getitem_110 = var_mean_20[0]\n getitem_111 = var_mean_20[1]; var_mean_20 = None\n add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None\n rsqrt_20 = torch.ops.aten.rsqrt.default(add_81); add_81 = None\n sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111); getitem_111 = None\n mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None\n mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124)\n add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None\n view_120 = torch.ops.aten.view.default(add_82, [64, 768]); add_82 = None\n permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None\n addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None\n view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]); addmm_40 = None\n split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None\n getitem_112 = split_10[0]\n getitem_113 = split_10[1]\n getitem_114 = split_10[2]; split_10 = None\n view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None\n permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None\n view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None\n permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None\n view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None\n permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None\n _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)\n getitem_115 = _scaled_dot_product_efficient_attention_10[0]\n getitem_116 = _scaled_dot_product_efficient_attention_10[1]\n getitem_117 = _scaled_dot_product_efficient_attention_10[2]\n getitem_118 = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None\n permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None\n view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None\n permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None\n addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85); primals_129 = view_126 = None\n view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]); addmm_41 = None\n add_83 = torch.ops.aten.add.Tensor(add_80, view_127); add_80 = view_127 = None\n var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)\n getitem_119 = var_mean_21[0]\n getitem_120 = var_mean_21[1]; var_mean_21 = None\n add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None\n rsqrt_21 = torch.ops.aten.rsqrt.default(add_84); add_84 = None\n sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120); getitem_120 = None\n mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None\n mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130)\n add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None\n view_128 = torch.ops.aten.view.default(add_85, [64, 768]); add_85 = None\n permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None\n addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None\n view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])\n mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None\n add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); view_129 = mul_85 = None\n mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None\n tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None\n add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None\n mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87); mul_84 = add_87 = None\n view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]); mul_87 = None\n permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None\n addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87); primals_135 = None\n view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]); addmm_43 = None\n add_88 = torch.ops.aten.add.Tensor(add_83, view_131); add_83 = view_131 = None\n var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)\n getitem_121 = var_mean_22[0]\n getitem_122 = var_mean_22[1]; var_mean_22 = None\n add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None\n rsqrt_22 = torch.ops.aten.rsqrt.default(add_89); add_89 = None\n sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122); getitem_122 = None\n mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None\n mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136)\n add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None\n view_132 = torch.ops.aten.view.default(add_90, [64, 768]); add_90 = None\n permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None\n addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None\n view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]); addmm_44 = None\n split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None\n getitem_123 = split_11[0]\n getitem_124 = split_11[1]\n getitem_125 = split_11[2]; split_11 = None\n view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None\n permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None\n view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None\n permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None\n view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None\n permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None\n _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)\n getitem_126 = _scaled_dot_product_efficient_attention_11[0]\n getitem_127 = _scaled_dot_product_efficient_attention_11[1]\n getitem_128 = _scaled_dot_product_efficient_attention_11[2]\n getitem_129 = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None\n permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None\n view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None\n permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None\n addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93); primals_141 = view_138 = None\n view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]); addmm_45 = None\n add_91 = torch.ops.aten.add.Tensor(add_88, view_139); add_88 = view_139 = None\n var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)\n getitem_130 = var_mean_23[0]\n getitem_131 = var_mean_23[1]; var_mean_23 = None\n add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None\n rsqrt_23 = torch.ops.aten.rsqrt.default(add_92); add_92 = None\n sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131); getitem_131 = None\n mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None\n mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142)\n add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None\n view_140 = torch.ops.aten.view.default(add_93, [64, 768]); add_93 = None\n permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None\n addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None\n view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])\n mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None\n add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); view_141 = mul_93 = None\n mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None\n tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None\n add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None\n mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95); mul_92 = add_95 = None\n view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]); mul_95 = None\n permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None\n addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95); primals_147 = None\n view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]); addmm_47 = None\n add_96 = torch.ops.aten.add.Tensor(add_91, view_143); add_91 = view_143 = None\n var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)\n getitem_132 = var_mean_24[0]\n getitem_133 = var_mean_24[1]; var_mean_24 = None\n add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None\n rsqrt_24 = torch.ops.aten.rsqrt.default(add_97); add_97 = None\n sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None\n mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None\n mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148)\n add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None\n full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n index = torch.ops.aten.index.Tensor(add_98, [None, full_default]); add_98 = None\n permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None\n view_144 = torch.ops.aten.view.default(index, [1, 768]); index = None\n mm = torch.ops.aten.mm.default(view_144, permute_96)\n view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]); mm = None\n permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None\n div = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None\n permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None\n permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None\n div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None\n permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None\n permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None\n div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None\n permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None\n permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None\n div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None\n permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None\n permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None\n div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None\n permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None\n permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None\n div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None\n permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None\n permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None\n div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None\n permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None\n permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None\n div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None\n permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None\n permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None\n div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None\n permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None\n permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None\n div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None\n permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None\n permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None\n div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None\n permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None\n permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None\n div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None\n permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None\n permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None\n div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None\n permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None\n permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None\n div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None\n permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None\n permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None\n div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None\n permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None\n permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None\n div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None\n permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None\n permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None\n div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None\n permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None\n permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None\n div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None\n permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None\n permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None\n div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None\n permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None\n permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None\n div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None\n permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None\n permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None\n div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None\n permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None\n permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None\n div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None\n permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None\n permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None\n div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None\n permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None\n permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None\n div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None\n permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None\n permute_337 = torch.ops.aten.permute.default(permute, [1, 0]); permute = None\n div_24 = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None\n return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)\n \n# To see more debug info, please use `graph_module.print_readable()`", + "[yily4oahymyyzyspnyjgkwteqzeiwe4kjdldmy3tmjumziqf7zb] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", + "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", + "[gqb2vspiuwox2kgd2oeoxezbk3ia6ckfpuiqza2vhvphouxwhv5] fx_kwargs[static_input_idxs]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]", + "[moyibva4eclxkrvb6e7da5ve2knrozngxwjgojtfbwsd4wt762m] fx_kwargs[user_visible_outputs]: {'view_145': None}", + "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", + "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", + "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", + "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", + "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", + "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", + "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", + "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", + "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", + "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", + "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", + "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", + "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", + "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", + "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", + "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", + "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", + "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", + "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", + "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", + "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", + "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", + "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", + "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", + "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", + "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", + "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", + "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", + "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", + "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", + "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", + "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", + "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", + "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", + "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", + "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", + "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", + "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", + "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", + "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", + "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", + "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", + "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", + "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", + "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", + "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", + "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", + "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", + "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess" + ] + }, + "ph": "i", + "pid": 0, + "s": "p" + } +V0806 13:56:00.757000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "383a721c539a14dd80b4ff9efee951aa"} + {"key": "fawswmdqdoeabru4cngdomrqdcmfg5ehi5bfp4lz3lpw74xd2r5q", "cache_state": "miss", "components": ["[7gdnkoxeguoowip7ectpux5j62uq56ccdoktbdshbvoqarzspmh] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149):\n iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)\n unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0); iota = None\n embedding = torch.ops.aten.embedding.default(primals_2, primals_1)\n embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze); primals_3 = None\n add = torch.ops.aten.add.Tensor(embedding, embedding_1); embedding = embedding_1 = None\n var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)\n getitem = var_mean[0]\n getitem_1 = var_mean[1]; var_mean = None\n add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None\n rsqrt = torch.ops.aten.rsqrt.default(add_1); add_1 = None\n sub = torch.ops.aten.sub.Tensor(add, getitem_1); getitem_1 = None\n mul = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = None\n mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4)\n add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5); mul_1 = primals_5 = None\n view = torch.ops.aten.view.default(add_2, [64, 768]); add_2 = None\n permute = torch.ops.aten.permute.default(primals_6, [1, 0]); primals_6 = None\n addmm = torch.ops.aten.addmm.default(primals_7, view, permute); primals_7 = None\n view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]); addmm = None\n split = torch.ops.aten.split.Tensor(view_1, 768, 2); view_1 = None\n getitem_2 = split[0]\n getitem_3 = split[1]\n getitem_4 = split[2]; split = None\n view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]); getitem_3 = None\n permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]); view_2 = None\n view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]); getitem_2 = None\n permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]); view_3 = None\n view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]); getitem_4 = None\n permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]); view_4 = None\n _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)\n getitem_5 = _scaled_dot_product_efficient_attention[0]\n getitem_6 = _scaled_dot_product_efficient_attention[1]\n getitem_7 = _scaled_dot_product_efficient_attention[2]\n getitem_8 = _scaled_dot_product_efficient_attention[3]; _scaled_dot_product_efficient_attention = None\n permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None\n view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None\n permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]); primals_8 = None\n addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5); primals_9 = view_6 = None\n view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]); addmm_1 = None\n add_3 = torch.ops.aten.add.Tensor(add, view_7); add = view_7 = None\n var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)\n getitem_9 = var_mean_1[0]\n getitem_10 = var_mean_1[1]; var_mean_1 = None\n add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05); getitem_9 = None\n rsqrt_1 = torch.ops.aten.rsqrt.default(add_4); add_4 = None\n sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10); getitem_10 = None\n mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1); sub_1 = None\n mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10)\n add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11); mul_3 = primals_11 = None\n view_8 = torch.ops.aten.view.default(add_5, [64, 768]); add_5 = None\n permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]); primals_12 = None\n addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6); primals_13 = None\n view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])\n mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None\n add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); view_9 = mul_5 = None\n mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None\n tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None\n add_7 = torch.ops.aten.add.Tensor(tanh, 1.0); tanh = None\n mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7); mul_4 = add_7 = None\n view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]); mul_7 = None\n permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]); primals_14 = None\n addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7); primals_15 = None\n view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]); addmm_3 = None\n add_8 = torch.ops.aten.add.Tensor(add_3, view_11); add_3 = view_11 = None\n var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)\n getitem_11 = var_mean_2[0]\n getitem_12 = var_mean_2[1]; var_mean_2 = None\n add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05); getitem_11 = None\n rsqrt_2 = torch.ops.aten.rsqrt.default(add_9); add_9 = None\n sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12); getitem_12 = None\n mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2); sub_2 = None\n mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16)\n add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17); mul_9 = primals_17 = None\n view_12 = torch.ops.aten.view.default(add_10, [64, 768]); add_10 = None\n permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]); primals_18 = None\n addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8); primals_19 = None\n view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]); addmm_4 = None\n split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2); view_13 = None\n getitem_13 = split_1[0]\n getitem_14 = split_1[1]\n getitem_15 = split_1[2]; split_1 = None\n view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]); getitem_14 = None\n permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]); view_14 = None\n view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]); getitem_13 = None\n permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]); view_15 = None\n view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]); getitem_15 = None\n permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]); view_16 = None\n _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)\n getitem_16 = _scaled_dot_product_efficient_attention_1[0]\n getitem_17 = _scaled_dot_product_efficient_attention_1[1]\n getitem_18 = _scaled_dot_product_efficient_attention_1[2]\n getitem_19 = _scaled_dot_product_efficient_attention_1[3]; _scaled_dot_product_efficient_attention_1 = None\n permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None\n view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None\n permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]); primals_20 = None\n addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13); primals_21 = view_18 = None\n view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]); addmm_5 = None\n add_11 = torch.ops.aten.add.Tensor(add_8, view_19); add_8 = view_19 = None\n var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)\n getitem_20 = var_mean_3[0]\n getitem_21 = var_mean_3[1]; var_mean_3 = None\n add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None\n rsqrt_3 = torch.ops.aten.rsqrt.default(add_12); add_12 = None\n sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21); getitem_21 = None\n mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3); sub_3 = None\n mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22)\n add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23); mul_11 = primals_23 = None\n view_20 = torch.ops.aten.view.default(add_13, [64, 768]); add_13 = None\n permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]); primals_24 = None\n addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14); primals_25 = None\n view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])\n mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None\n add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); view_21 = mul_13 = None\n mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None\n tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None\n add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0); tanh_1 = None\n mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15); mul_12 = add_15 = None\n view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]); mul_15 = None\n permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]); primals_26 = None\n addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15); primals_27 = None\n view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]); addmm_7 = None\n add_16 = torch.ops.aten.add.Tensor(add_11, view_23); add_11 = view_23 = None\n var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)\n getitem_22 = var_mean_4[0]\n getitem_23 = var_mean_4[1]; var_mean_4 = None\n add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None\n rsqrt_4 = torch.ops.aten.rsqrt.default(add_17); add_17 = None\n sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23); getitem_23 = None\n mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4); sub_4 = None\n mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28)\n add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29); mul_17 = primals_29 = None\n view_24 = torch.ops.aten.view.default(add_18, [64, 768]); add_18 = None\n permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]); primals_30 = None\n addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16); primals_31 = None\n view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]); addmm_8 = None\n split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2); view_25 = None\n getitem_24 = split_2[0]\n getitem_25 = split_2[1]\n getitem_26 = split_2[2]; split_2 = None\n view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]); getitem_25 = None\n permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]); view_26 = None\n view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]); getitem_24 = None\n permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]); view_27 = None\n view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]); getitem_26 = None\n permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]); view_28 = None\n _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)\n getitem_27 = _scaled_dot_product_efficient_attention_2[0]\n getitem_28 = _scaled_dot_product_efficient_attention_2[1]\n getitem_29 = _scaled_dot_product_efficient_attention_2[2]\n getitem_30 = _scaled_dot_product_efficient_attention_2[3]; _scaled_dot_product_efficient_attention_2 = None\n permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None\n view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None\n permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]); primals_32 = None\n addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21); primals_33 = view_30 = None\n view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]); addmm_9 = None\n add_19 = torch.ops.aten.add.Tensor(add_16, view_31); add_16 = view_31 = None\n var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)\n getitem_31 = var_mean_5[0]\n getitem_32 = var_mean_5[1]; var_mean_5 = None\n add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05); getitem_31 = None\n rsqrt_5 = torch.ops.aten.rsqrt.default(add_20); add_20 = None\n sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32); getitem_32 = None\n mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5); sub_5 = None\n mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34)\n add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35); mul_19 = primals_35 = None\n view_32 = torch.ops.aten.view.default(add_21, [64, 768]); add_21 = None\n permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]); primals_36 = None\n addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22); primals_37 = None\n view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])\n mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None\n add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); view_33 = mul_21 = None\n mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None\n tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None\n add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0); tanh_2 = None\n mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23); mul_20 = add_23 = None\n view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]); mul_23 = None\n permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]); primals_38 = None\n addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23); primals_39 = None\n view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]); addmm_11 = None\n add_24 = torch.ops.aten.add.Tensor(add_19, view_35); add_19 = view_35 = None\n var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)\n getitem_33 = var_mean_6[0]\n getitem_34 = var_mean_6[1]; var_mean_6 = None\n add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05); getitem_33 = None\n rsqrt_6 = torch.ops.aten.rsqrt.default(add_25); add_25 = None\n sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34); getitem_34 = None\n mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6); sub_6 = None\n mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40)\n add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41); mul_25 = primals_41 = None\n view_36 = torch.ops.aten.view.default(add_26, [64, 768]); add_26 = None\n permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]); primals_42 = None\n addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24); primals_43 = None\n view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]); addmm_12 = None\n split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2); view_37 = None\n getitem_35 = split_3[0]\n getitem_36 = split_3[1]\n getitem_37 = split_3[2]; split_3 = None\n view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]); getitem_36 = None\n permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]); view_38 = None\n view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]); getitem_35 = None\n permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]); view_39 = None\n view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]); getitem_37 = None\n permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]); view_40 = None\n _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)\n getitem_38 = _scaled_dot_product_efficient_attention_3[0]\n getitem_39 = _scaled_dot_product_efficient_attention_3[1]\n getitem_40 = _scaled_dot_product_efficient_attention_3[2]\n getitem_41 = _scaled_dot_product_efficient_attention_3[3]; _scaled_dot_product_efficient_attention_3 = None\n permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None\n view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None\n permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]); primals_44 = None\n addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29); primals_45 = view_42 = None\n view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]); addmm_13 = None\n add_27 = torch.ops.aten.add.Tensor(add_24, view_43); add_24 = view_43 = None\n var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)\n getitem_42 = var_mean_7[0]\n getitem_43 = var_mean_7[1]; var_mean_7 = None\n add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None\n rsqrt_7 = torch.ops.aten.rsqrt.default(add_28); add_28 = None\n sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43); getitem_43 = None\n mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7); sub_7 = None\n mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46)\n add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47); mul_27 = primals_47 = None\n view_44 = torch.ops.aten.view.default(add_29, [64, 768]); add_29 = None\n permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]); primals_48 = None\n addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30); primals_49 = None\n view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])\n mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None\n add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); view_45 = mul_29 = None\n mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None\n tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None\n add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0); tanh_3 = None\n mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31); mul_28 = add_31 = None\n view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]); mul_31 = None\n permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]); primals_50 = None\n addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31); primals_51 = None\n view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]); addmm_15 = None\n add_32 = torch.ops.aten.add.Tensor(add_27, view_47); add_27 = view_47 = None\n var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)\n getitem_44 = var_mean_8[0]\n getitem_45 = var_mean_8[1]; var_mean_8 = None\n add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None\n rsqrt_8 = torch.ops.aten.rsqrt.default(add_33); add_33 = None\n sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45); getitem_45 = None\n mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8); sub_8 = None\n mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52)\n add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53); mul_33 = primals_53 = None\n view_48 = torch.ops.aten.view.default(add_34, [64, 768]); add_34 = None\n permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]); primals_54 = None\n addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32); primals_55 = None\n view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]); addmm_16 = None\n split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2); view_49 = None\n getitem_46 = split_4[0]\n getitem_47 = split_4[1]\n getitem_48 = split_4[2]; split_4 = None\n view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]); getitem_47 = None\n permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]); view_50 = None\n view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]); getitem_46 = None\n permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]); view_51 = None\n view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]); getitem_48 = None\n permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]); view_52 = None\n _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)\n getitem_49 = _scaled_dot_product_efficient_attention_4[0]\n getitem_50 = _scaled_dot_product_efficient_attention_4[1]\n getitem_51 = _scaled_dot_product_efficient_attention_4[2]\n getitem_52 = _scaled_dot_product_efficient_attention_4[3]; _scaled_dot_product_efficient_attention_4 = None\n permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None\n view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None\n permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]); primals_56 = None\n addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37); primals_57 = view_54 = None\n view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]); addmm_17 = None\n add_35 = torch.ops.aten.add.Tensor(add_32, view_55); add_32 = view_55 = None\n var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)\n getitem_53 = var_mean_9[0]\n getitem_54 = var_mean_9[1]; var_mean_9 = None\n add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05); getitem_53 = None\n rsqrt_9 = torch.ops.aten.rsqrt.default(add_36); add_36 = None\n sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54); getitem_54 = None\n mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9); sub_9 = None\n mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58)\n add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59); mul_35 = primals_59 = None\n view_56 = torch.ops.aten.view.default(add_37, [64, 768]); add_37 = None\n permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]); primals_60 = None\n addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38); primals_61 = None\n view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])\n mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None\n add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); view_57 = mul_37 = None\n mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None\n tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None\n add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0); tanh_4 = None\n mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39); mul_36 = add_39 = None\n view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]); mul_39 = None\n permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]); primals_62 = None\n addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39); primals_63 = None\n view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]); addmm_19 = None\n add_40 = torch.ops.aten.add.Tensor(add_35, view_59); add_35 = view_59 = None\n var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)\n getitem_55 = var_mean_10[0]\n getitem_56 = var_mean_10[1]; var_mean_10 = None\n add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05); getitem_55 = None\n rsqrt_10 = torch.ops.aten.rsqrt.default(add_41); add_41 = None\n sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56); getitem_56 = None\n mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10); sub_10 = None\n mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64)\n add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65); mul_41 = primals_65 = None\n view_60 = torch.ops.aten.view.default(add_42, [64, 768]); add_42 = None\n permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]); primals_66 = None\n addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40); primals_67 = None\n view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]); addmm_20 = None\n split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2); view_61 = None\n getitem_57 = split_5[0]\n getitem_58 = split_5[1]\n getitem_59 = split_5[2]; split_5 = None\n view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]); getitem_58 = None\n permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]); view_62 = None\n view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]); getitem_57 = None\n permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]); view_63 = None\n view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]); getitem_59 = None\n permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]); view_64 = None\n _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)\n getitem_60 = _scaled_dot_product_efficient_attention_5[0]\n getitem_61 = _scaled_dot_product_efficient_attention_5[1]\n getitem_62 = _scaled_dot_product_efficient_attention_5[2]\n getitem_63 = _scaled_dot_product_efficient_attention_5[3]; _scaled_dot_product_efficient_attention_5 = None\n permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None\n view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None\n permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]); primals_68 = None\n addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45); primals_69 = view_66 = None\n view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]); addmm_21 = None\n add_43 = torch.ops.aten.add.Tensor(add_40, view_67); add_40 = view_67 = None\n var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)\n getitem_64 = var_mean_11[0]\n getitem_65 = var_mean_11[1]; var_mean_11 = None\n add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None\n rsqrt_11 = torch.ops.aten.rsqrt.default(add_44); add_44 = None\n sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65); getitem_65 = None\n mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11); sub_11 = None\n mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70)\n add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71); mul_43 = primals_71 = None\n view_68 = torch.ops.aten.view.default(add_45, [64, 768]); add_45 = None\n permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]); primals_72 = None\n addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46); primals_73 = None\n view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])\n mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None\n add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); view_69 = mul_45 = None\n mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None\n tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None\n add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0); tanh_5 = None\n mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47); mul_44 = add_47 = None\n view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]); mul_47 = None\n permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]); primals_74 = None\n addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47); primals_75 = None\n view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]); addmm_23 = None\n add_48 = torch.ops.aten.add.Tensor(add_43, view_71); add_43 = view_71 = None\n var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)\n getitem_66 = var_mean_12[0]\n getitem_67 = var_mean_12[1]; var_mean_12 = None\n add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None\n rsqrt_12 = torch.ops.aten.rsqrt.default(add_49); add_49 = None\n sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67); getitem_67 = None\n mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12); sub_12 = None\n mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76)\n add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77); mul_49 = primals_77 = None\n view_72 = torch.ops.aten.view.default(add_50, [64, 768]); add_50 = None\n permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]); primals_78 = None\n addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48); primals_79 = None\n view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]); addmm_24 = None\n split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2); view_73 = None\n getitem_68 = split_6[0]\n getitem_69 = split_6[1]\n getitem_70 = split_6[2]; split_6 = None\n view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]); getitem_69 = None\n permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]); view_74 = None\n view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]); getitem_68 = None\n permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]); view_75 = None\n view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]); getitem_70 = None\n permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]); view_76 = None\n _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)\n getitem_71 = _scaled_dot_product_efficient_attention_6[0]\n getitem_72 = _scaled_dot_product_efficient_attention_6[1]\n getitem_73 = _scaled_dot_product_efficient_attention_6[2]\n getitem_74 = _scaled_dot_product_efficient_attention_6[3]; _scaled_dot_product_efficient_attention_6 = None\n permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None\n view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None\n permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]); primals_80 = None\n addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53); primals_81 = view_78 = None\n view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]); addmm_25 = None\n add_51 = torch.ops.aten.add.Tensor(add_48, view_79); add_48 = view_79 = None\n var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)\n getitem_75 = var_mean_13[0]\n getitem_76 = var_mean_13[1]; var_mean_13 = None\n add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05); getitem_75 = None\n rsqrt_13 = torch.ops.aten.rsqrt.default(add_52); add_52 = None\n sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76); getitem_76 = None\n mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13); sub_13 = None\n mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82)\n add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83); mul_51 = primals_83 = None\n view_80 = torch.ops.aten.view.default(add_53, [64, 768]); add_53 = None\n permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]); primals_84 = None\n addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54); primals_85 = None\n view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])\n mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None\n add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); view_81 = mul_53 = None\n mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None\n tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None\n add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0); tanh_6 = None\n mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55); mul_52 = add_55 = None\n view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]); mul_55 = None\n permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]); primals_86 = None\n addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55); primals_87 = None\n view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]); addmm_27 = None\n add_56 = torch.ops.aten.add.Tensor(add_51, view_83); add_51 = view_83 = None\n var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)\n getitem_77 = var_mean_14[0]\n getitem_78 = var_mean_14[1]; var_mean_14 = None\n add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05); getitem_77 = None\n rsqrt_14 = torch.ops.aten.rsqrt.default(add_57); add_57 = None\n sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78); getitem_78 = None\n mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14); sub_14 = None\n mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88)\n add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89); mul_57 = primals_89 = None\n view_84 = torch.ops.aten.view.default(add_58, [64, 768]); add_58 = None\n permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]); primals_90 = None\n addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56); primals_91 = None\n view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]); addmm_28 = None\n split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2); view_85 = None\n getitem_79 = split_7[0]\n getitem_80 = split_7[1]\n getitem_81 = split_7[2]; split_7 = None\n view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]); getitem_80 = None\n permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]); view_86 = None\n view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]); getitem_79 = None\n permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]); view_87 = None\n view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]); getitem_81 = None\n permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]); view_88 = None\n _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)\n getitem_82 = _scaled_dot_product_efficient_attention_7[0]\n getitem_83 = _scaled_dot_product_efficient_attention_7[1]\n getitem_84 = _scaled_dot_product_efficient_attention_7[2]\n getitem_85 = _scaled_dot_product_efficient_attention_7[3]; _scaled_dot_product_efficient_attention_7 = None\n permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None\n view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None\n permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]); primals_92 = None\n addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61); primals_93 = view_90 = None\n view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]); addmm_29 = None\n add_59 = torch.ops.aten.add.Tensor(add_56, view_91); add_56 = view_91 = None\n var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)\n getitem_86 = var_mean_15[0]\n getitem_87 = var_mean_15[1]; var_mean_15 = None\n add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None\n rsqrt_15 = torch.ops.aten.rsqrt.default(add_60); add_60 = None\n sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87); getitem_87 = None\n mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15); sub_15 = None\n mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94)\n add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95); mul_59 = primals_95 = None\n view_92 = torch.ops.aten.view.default(add_61, [64, 768]); add_61 = None\n permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]); primals_96 = None\n addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62); primals_97 = None\n view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])\n mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None\n add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); view_93 = mul_61 = None\n mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None\n tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None\n add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0); tanh_7 = None\n mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63); mul_60 = add_63 = None\n view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]); mul_63 = None\n permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]); primals_98 = None\n addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63); primals_99 = None\n view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]); addmm_31 = None\n add_64 = torch.ops.aten.add.Tensor(add_59, view_95); add_59 = view_95 = None\n var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)\n getitem_88 = var_mean_16[0]\n getitem_89 = var_mean_16[1]; var_mean_16 = None\n add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None\n rsqrt_16 = torch.ops.aten.rsqrt.default(add_65); add_65 = None\n sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89); getitem_89 = None\n mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16); sub_16 = None\n mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100)\n add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101); mul_65 = primals_101 = None\n view_96 = torch.ops.aten.view.default(add_66, [64, 768]); add_66 = None\n permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]); primals_102 = None\n addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64); primals_103 = None\n view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]); addmm_32 = None\n split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2); view_97 = None\n getitem_90 = split_8[0]\n getitem_91 = split_8[1]\n getitem_92 = split_8[2]; split_8 = None\n view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]); getitem_91 = None\n permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]); view_98 = None\n view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]); getitem_90 = None\n permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]); view_99 = None\n view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]); getitem_92 = None\n permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]); view_100 = None\n _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)\n getitem_93 = _scaled_dot_product_efficient_attention_8[0]\n getitem_94 = _scaled_dot_product_efficient_attention_8[1]\n getitem_95 = _scaled_dot_product_efficient_attention_8[2]\n getitem_96 = _scaled_dot_product_efficient_attention_8[3]; _scaled_dot_product_efficient_attention_8 = None\n permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None\n view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None\n permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]); primals_104 = None\n addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69); primals_105 = view_102 = None\n view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]); addmm_33 = None\n add_67 = torch.ops.aten.add.Tensor(add_64, view_103); add_64 = view_103 = None\n var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)\n getitem_97 = var_mean_17[0]\n getitem_98 = var_mean_17[1]; var_mean_17 = None\n add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05); getitem_97 = None\n rsqrt_17 = torch.ops.aten.rsqrt.default(add_68); add_68 = None\n sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98); getitem_98 = None\n mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17); sub_17 = None\n mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106)\n add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107); mul_67 = primals_107 = None\n view_104 = torch.ops.aten.view.default(add_69, [64, 768]); add_69 = None\n permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]); primals_108 = None\n addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70); primals_109 = None\n view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])\n mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None\n add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); view_105 = mul_69 = None\n mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None\n tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None\n add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0); tanh_8 = None\n mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71); mul_68 = add_71 = None\n view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]); mul_71 = None\n permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]); primals_110 = None\n addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71); primals_111 = None\n view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]); addmm_35 = None\n add_72 = torch.ops.aten.add.Tensor(add_67, view_107); add_67 = view_107 = None\n var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)\n getitem_99 = var_mean_18[0]\n getitem_100 = var_mean_18[1]; var_mean_18 = None\n add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05); getitem_99 = None\n rsqrt_18 = torch.ops.aten.rsqrt.default(add_73); add_73 = None\n sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100); getitem_100 = None\n mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18); sub_18 = None\n mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112)\n add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113); mul_73 = primals_113 = None\n view_108 = torch.ops.aten.view.default(add_74, [64, 768]); add_74 = None\n permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]); primals_114 = None\n addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72); primals_115 = None\n view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]); addmm_36 = None\n split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2); view_109 = None\n getitem_101 = split_9[0]\n getitem_102 = split_9[1]\n getitem_103 = split_9[2]; split_9 = None\n view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]); getitem_102 = None\n permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]); view_110 = None\n view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]); getitem_101 = None\n permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]); view_111 = None\n view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]); getitem_103 = None\n permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None\n _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)\n getitem_104 = _scaled_dot_product_efficient_attention_9[0]\n getitem_105 = _scaled_dot_product_efficient_attention_9[1]\n getitem_106 = _scaled_dot_product_efficient_attention_9[2]\n getitem_107 = _scaled_dot_product_efficient_attention_9[3]; _scaled_dot_product_efficient_attention_9 = None\n permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None\n view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None\n permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]); primals_116 = None\n addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77); primals_117 = view_114 = None\n view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]); addmm_37 = None\n add_75 = torch.ops.aten.add.Tensor(add_72, view_115); add_72 = view_115 = None\n var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)\n getitem_108 = var_mean_19[0]\n getitem_109 = var_mean_19[1]; var_mean_19 = None\n add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05); getitem_108 = None\n rsqrt_19 = torch.ops.aten.rsqrt.default(add_76); add_76 = None\n sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109); getitem_109 = None\n mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19); sub_19 = None\n mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118)\n add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119); mul_75 = primals_119 = None\n view_116 = torch.ops.aten.view.default(add_77, [64, 768]); add_77 = None\n permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]); primals_120 = None\n addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78); primals_121 = None\n view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])\n mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None\n add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); view_117 = mul_77 = None\n mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None\n tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None\n add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0); tanh_9 = None\n mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79); mul_76 = add_79 = None\n view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]); mul_79 = None\n permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]); primals_122 = None\n addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79); primals_123 = None\n view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]); addmm_39 = None\n add_80 = torch.ops.aten.add.Tensor(add_75, view_119); add_75 = view_119 = None\n var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)\n getitem_110 = var_mean_20[0]\n getitem_111 = var_mean_20[1]; var_mean_20 = None\n add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05); getitem_110 = None\n rsqrt_20 = torch.ops.aten.rsqrt.default(add_81); add_81 = None\n sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111); getitem_111 = None\n mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20); sub_20 = None\n mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124)\n add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125); mul_81 = primals_125 = None\n view_120 = torch.ops.aten.view.default(add_82, [64, 768]); add_82 = None\n permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]); primals_126 = None\n addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80); primals_127 = None\n view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]); addmm_40 = None\n split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2); view_121 = None\n getitem_112 = split_10[0]\n getitem_113 = split_10[1]\n getitem_114 = split_10[2]; split_10 = None\n view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]); getitem_113 = None\n permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]); view_122 = None\n view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]); getitem_112 = None\n permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]); view_123 = None\n view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]); getitem_114 = None\n permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]); view_124 = None\n _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)\n getitem_115 = _scaled_dot_product_efficient_attention_10[0]\n getitem_116 = _scaled_dot_product_efficient_attention_10[1]\n getitem_117 = _scaled_dot_product_efficient_attention_10[2]\n getitem_118 = _scaled_dot_product_efficient_attention_10[3]; _scaled_dot_product_efficient_attention_10 = None\n permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None\n view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None\n permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]); primals_128 = None\n addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85); primals_129 = view_126 = None\n view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]); addmm_41 = None\n add_83 = torch.ops.aten.add.Tensor(add_80, view_127); add_80 = view_127 = None\n var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)\n getitem_119 = var_mean_21[0]\n getitem_120 = var_mean_21[1]; var_mean_21 = None\n add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05); getitem_119 = None\n rsqrt_21 = torch.ops.aten.rsqrt.default(add_84); add_84 = None\n sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120); getitem_120 = None\n mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21); sub_21 = None\n mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130)\n add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131); mul_83 = primals_131 = None\n view_128 = torch.ops.aten.view.default(add_85, [64, 768]); add_85 = None\n permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]); primals_132 = None\n addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86); primals_133 = None\n view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])\n mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None\n add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); view_129 = mul_85 = None\n mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None\n tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None\n add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0); tanh_10 = None\n mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87); mul_84 = add_87 = None\n view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]); mul_87 = None\n permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]); primals_134 = None\n addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87); primals_135 = None\n view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]); addmm_43 = None\n add_88 = torch.ops.aten.add.Tensor(add_83, view_131); add_83 = view_131 = None\n var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)\n getitem_121 = var_mean_22[0]\n getitem_122 = var_mean_22[1]; var_mean_22 = None\n add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05); getitem_121 = None\n rsqrt_22 = torch.ops.aten.rsqrt.default(add_89); add_89 = None\n sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122); getitem_122 = None\n mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22); sub_22 = None\n mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136)\n add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137); mul_89 = primals_137 = None\n view_132 = torch.ops.aten.view.default(add_90, [64, 768]); add_90 = None\n permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]); primals_138 = None\n addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88); primals_139 = None\n view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]); addmm_44 = None\n split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2); view_133 = None\n getitem_123 = split_11[0]\n getitem_124 = split_11[1]\n getitem_125 = split_11[2]; split_11 = None\n view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]); getitem_124 = None\n permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]); view_134 = None\n view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]); getitem_123 = None\n permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]); view_135 = None\n view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]); getitem_125 = None\n permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]); view_136 = None\n _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)\n getitem_126 = _scaled_dot_product_efficient_attention_11[0]\n getitem_127 = _scaled_dot_product_efficient_attention_11[1]\n getitem_128 = _scaled_dot_product_efficient_attention_11[2]\n getitem_129 = _scaled_dot_product_efficient_attention_11[3]; _scaled_dot_product_efficient_attention_11 = None\n permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None\n view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None\n permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]); primals_140 = None\n addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93); primals_141 = view_138 = None\n view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]); addmm_45 = None\n add_91 = torch.ops.aten.add.Tensor(add_88, view_139); add_88 = view_139 = None\n var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)\n getitem_130 = var_mean_23[0]\n getitem_131 = var_mean_23[1]; var_mean_23 = None\n add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05); getitem_130 = None\n rsqrt_23 = torch.ops.aten.rsqrt.default(add_92); add_92 = None\n sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131); getitem_131 = None\n mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23); sub_23 = None\n mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142)\n add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143); mul_91 = primals_143 = None\n view_140 = torch.ops.aten.view.default(add_93, [64, 768]); add_93 = None\n permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]); primals_144 = None\n addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94); primals_145 = None\n view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])\n mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None\n add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); view_141 = mul_93 = None\n mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None\n tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None\n add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0); tanh_11 = None\n mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95); mul_92 = add_95 = None\n view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]); mul_95 = None\n permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]); primals_146 = None\n addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95); primals_147 = None\n view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]); addmm_47 = None\n add_96 = torch.ops.aten.add.Tensor(add_91, view_143); add_91 = view_143 = None\n var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)\n getitem_132 = var_mean_24[0]\n getitem_133 = var_mean_24[1]; var_mean_24 = None\n add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05); getitem_132 = None\n rsqrt_24 = torch.ops.aten.rsqrt.default(add_97); add_97 = None\n sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133); add_96 = getitem_133 = None\n mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24); sub_24 = None\n mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148)\n add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149); mul_97 = primals_149 = None\n full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n index = torch.ops.aten.index.Tensor(add_98, [None, full_default]); add_98 = None\n permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]); primals_2 = None\n view_144 = torch.ops.aten.view.default(index, [1, 768]); index = None\n mm = torch.ops.aten.mm.default(view_144, permute_96)\n view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]); mm = None\n permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]); permute_96 = None\n div = torch.ops.aten.div.Tensor(rsqrt_24, 768); rsqrt_24 = None\n permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]); permute_95 = None\n permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]); permute_94 = None\n div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768); rsqrt_23 = None\n permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]); permute_93 = None\n permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]); permute_88 = None\n div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768); rsqrt_22 = None\n permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]); permute_87 = None\n permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]); permute_86 = None\n div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768); rsqrt_21 = None\n permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]); permute_85 = None\n permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]); permute_80 = None\n div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768); rsqrt_20 = None\n permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]); permute_79 = None\n permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]); permute_78 = None\n div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768); rsqrt_19 = None\n permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]); permute_77 = None\n permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]); permute_72 = None\n div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768); rsqrt_18 = None\n permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]); permute_71 = None\n permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]); permute_70 = None\n div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768); rsqrt_17 = None\n permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]); permute_69 = None\n permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]); permute_64 = None\n div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768); rsqrt_16 = None\n permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]); permute_63 = None\n permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]); permute_62 = None\n div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768); rsqrt_15 = None\n permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]); permute_61 = None\n permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]); permute_56 = None\n div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768); rsqrt_14 = None\n permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]); permute_55 = None\n permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]); permute_54 = None\n div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768); rsqrt_13 = None\n permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]); permute_53 = None\n permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]); permute_48 = None\n div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768); rsqrt_12 = None\n permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]); permute_47 = None\n permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]); permute_46 = None\n div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768); rsqrt_11 = None\n permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]); permute_45 = None\n permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]); permute_40 = None\n div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768); rsqrt_10 = None\n permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]); permute_39 = None\n permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]); permute_38 = None\n div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768); rsqrt_9 = None\n permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]); permute_37 = None\n permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]); permute_32 = None\n div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768); rsqrt_8 = None\n permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]); permute_31 = None\n permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]); permute_30 = None\n div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768); rsqrt_7 = None\n permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]); permute_29 = None\n permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]); permute_24 = None\n div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768); rsqrt_6 = None\n permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]); permute_23 = None\n permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]); permute_22 = None\n div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768); rsqrt_5 = None\n permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]); permute_21 = None\n permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]); permute_16 = None\n div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768); rsqrt_4 = None\n permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]); permute_15 = None\n permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]); permute_14 = None\n div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768); rsqrt_3 = None\n permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]); permute_13 = None\n permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]); permute_8 = None\n div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768); rsqrt_2 = None\n permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]); permute_7 = None\n permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]); permute_6 = None\n div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768); rsqrt_1 = None\n permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]); permute_5 = None\n permute_337 = torch.ops.aten.permute.default(permute, [1, 0]); permute = None\n div_24 = torch.ops.aten.div.Tensor(rsqrt, 768); rsqrt = None\n return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)\n \n# To see more debug info, please use `graph_module.print_readable()`", "[yily4oahymyyzyspnyjgkwteqzeiwe4kjdldmy3tmjumziqf7zb] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[gqb2vspiuwox2kgd2oeoxezbk3ia6ckfpuiqza2vhvphouxwhv5] fx_kwargs[static_input_idxs]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]", "[moyibva4eclxkrvb6e7da5ve2knrozngxwjgojtfbwsd4wt762m] fx_kwargs[user_visible_outputs]: {'view_145': None}", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]} +V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "38efd6f35d8d14d5151e15991476a39a"} + { + "name": "inductor_compile", + "ts": 1722977760761464.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "8cfe0861d6e86b59ec82f05bd122f742"} + { + "name": "compile_fx_inner", + "ts": 1722977760761566.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "730fc8316778651443b74058f01841e6"} + { + "name": "compile_fx..fw_compiler_base", + "ts": 1722977760761706.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fbafd3f40aba7f04b0dafd48adfab1af"} + { + "name": "create_aot_dispatcher_function", + "ts": 1722977760764617.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "886313546297de520526a20c4afcc203"} + { + "name": "backend_compile", + "ts": 1722977760764839.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "535d95fac8886817ed3f41158501e8fe"} + { + "name": "OutputGraph.call_user_compiler", + "ts": 1722977760764914.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.004000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "634e39b71251a8c4dda8e4e18aea5906"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod) + | | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328) + | | +- GuardManager: source=L['mod'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod'].__dict__) + | | | +- DictSubclassGuardManager: source=L['mod']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | +- DictSubclassGuardManager: source=L['mod']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | +- GuardManager: source=L['mod'].config, accessed_by=DictGetItemGuardAccessor(config) + | | | | +- TYPE_MATCH: ___check_type_id(L['mod'].config, 94206531296000) + | | | | +- GuardManager: source=L['mod'].config.block_size, accessed_by=GetAttrGuardAccessor(block_size) + | | | | | +- EQUALS_MATCH: L['mod'].config.block_size == 1024 + | | | +- GuardManager: source=L['mod']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | +- DICT_LENGTH: len(L['mod']._modules) == 2 + | | | | +- GuardManager: source=L['mod']._modules['transformer'], accessed_by=DictGetItemGuardAccessor(transformer) + | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer'], 94206198915872) + | | | | | +- GuardManager: source=L['mod']._modules['transformer'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules) == 5 + | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'], accessed_by=DictGetItemGuardAccessor(wte) + | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['wte'], 94206200083792) + | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['wte'].__dict__) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].sparse, accessed_by=DictGetItemGuardAccessor(sparse) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].sparse, 94206128801376) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].max_norm, accessed_by=DictGetItemGuardAccessor(max_norm) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].max_norm, 94206128752608) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].norm_type, accessed_by=DictGetItemGuardAccessor(norm_type) + | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['wte'].norm_type == 2.0 + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['wte']._parameters) == 1 + | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['wte']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[50304, 768], stride=[768, 1]) + | | | | | | | | | | | +- OBJECT_ALIASING: L['mod']._modules['transformer']._modules['wte']._parameters['weight'] is L['mod']._modules['lm_head']._parameters['weight'] + | | | | | | | | | | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['cloned_inputs'][0], L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], L['mod']._modules['transformer']._modules['wte']._parameters['weight'], L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight']) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].padding_idx, accessed_by=DictGetItemGuardAccessor(padding_idx) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].padding_idx, 94206128752608) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].scale_grad_by_freq, accessed_by=DictGetItemGuardAccessor(scale_grad_by_freq) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].scale_grad_by_freq, 94206128801376) + | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'], accessed_by=DictGetItemGuardAccessor(wpe) + | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['wpe'], 94206200083792) + | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['wpe'].__dict__) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].sparse, accessed_by=DictGetItemGuardAccessor(sparse) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].sparse, 94206128801376) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].max_norm, accessed_by=DictGetItemGuardAccessor(max_norm) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].max_norm, 94206128752608) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].norm_type, accessed_by=DictGetItemGuardAccessor(norm_type) + | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['wpe'].norm_type == 2.0 + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['wpe']._parameters) == 1 + | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1024, 768], stride=[768, 1]) + | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].padding_idx, accessed_by=DictGetItemGuardAccessor(padding_idx) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].padding_idx, 94206128752608) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].scale_grad_by_freq, accessed_by=DictGetItemGuardAccessor(scale_grad_by_freq) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].scale_grad_by_freq, 94206128801376) + | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'], accessed_by=DictGetItemGuardAccessor(drop) + | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['drop'], 94206199111456) + | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['drop'].__dict__) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['drop'].p == 0.0 + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['drop'].inplace, 94206128801376) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['drop'].training, 94206128801408) + | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h'], accessed_by=DictGetItemGuardAccessor(h) + | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h'], 94206198914912) + | | | | | | | | +- LENGTH_CHECK: len(L['mod']._modules['transformer']._modules['h']) == 12 + | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | +- DictGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | +- KeyValueManager pair at index=0 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[0] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[0] == '0' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=1 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[1] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[1] == '1' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=2 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[2] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[2] == '2' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=3 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[3] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[3] == '3' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=4 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[4] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[4] == '4' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=5 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[5] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[5] == '5' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=6 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[6] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[6] == '6' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=7 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[7] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[7] == '7' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=8 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[8] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[8] == '8' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=9 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[9] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[9] == '9' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=10 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[10] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[10] == '10' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._backward_pre_hooks + | | | | | | | | | | +- KeyValueManager pair at index=11 + | | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[11] + | | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[11] == '11' + | | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11'] + | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11'], 94206531295040) + | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11'].__dict__) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules) == 4 + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'], 94206531293120) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].flash, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_embd == 768 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_head == 12 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].dropout == 0.0 + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules) == 4 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout) + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'], 94206531292160) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters) == 2 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_pre_hooks + | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp) + | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'], 94206531294080) + | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'].__dict__) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules) == 3 + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'], 94206198956688) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters) == 2 + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout) + | | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'], 94206199111456) + | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].__dict__) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p) + | | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].p == 0.0 + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].inplace, 94206128801376) + | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training) + | | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].training, 94206128801408) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._parameters + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_hooks + | | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_pre_hooks + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._parameters + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._backward_hooks + | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._backward_pre_hooks + | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f'], accessed_by=DictGetItemGuardAccessor(ln_f) + | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['ln_f'], 94206531292160) + | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['ln_f'].__dict__) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['ln_f']._parameters) == 2 + | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | | | | | | +- NO_TENSOR_ALIASING + | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['ln_f']._backward_hooks + | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks) + | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['ln_f']._backward_pre_hooks + | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._parameters + | | | | +- GuardManager: source=L['mod']._modules['lm_head'], accessed_by=DictGetItemGuardAccessor(lm_head) + | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['lm_head'], 94206198956688) + | | | | | +- GuardManager: source=L['mod']._modules['lm_head'].__dict__, accessed_by=GetGenericDictGuardAccessor + | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['lm_head'].__dict__) + | | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['lm_head']._parameters) == 2 + | | | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight) + | | | | | | | | +- OBJECT_ALIASING: L['mod']._modules['transformer']._modules['wte']._parameters['weight'] is L['mod']._modules['lm_head']._parameters['weight'] + | | | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias) + | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['lm_head']._parameters['bias'], 94206128752608) + | | | +- GuardManager: source=L['mod']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters) + | | | | +- DICT_LENGTH: not L['mod']._parameters + | | | +- GuardManager: source=L['mod']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks) + | | | | +- DICT_LENGTH: not L['mod']._backward_hooks + | | | +- GuardManager: source=L['mod']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks) + | | | | +- DICT_LENGTH: not L['mod']._backward_pre_hooks + | +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self) + | | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624) + | | +- GuardManager: source=L['self'].autocast, accessed_by=GetAttrGuardAccessor(autocast) + | | | +- ID_MATCH: ___check_obj_id(L['self'].autocast, 94206129614704) + | | +- GuardManager: source=L['self'].autocast_arg, accessed_by=GetAttrGuardAccessor(autocast_arg) + | | | +- DICT_LENGTH: not L['self'].autocast_arg + | +- GuardManager: source=L['cloned_inputs'], accessed_by=DictGetItemGuardAccessor(cloned_inputs) + | | +- TYPE_MATCH: ___check_type_id(L['cloned_inputs'], 94206128766016) + | | +- LENGTH_CHECK: len(L['cloned_inputs']) == 1 + | | +- GuardManager: source=L['cloned_inputs'][0], accessed_by=ListGetItemGuardAccessor(0) + | | | +- TENSOR_MATCH: check_tensor(L['cloned_inputs'][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int64, device=0, requires_grad=False, size=[1, 64], stride=[64, 1]) + | | | +- NO_HASATTR: hasattr(L['cloned_inputs'][0], '_dynamo_dynamic_indices') == False + | | | +- NO_TENSOR_ALIASING + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['__builtins_dict___7'], accessed_by=DictGetItemGuardAccessor(__builtins_dict___7) + | | | +- GuardManager: source=G['__builtins_dict___7']['dict'], accessed_by=DictGetItemGuardAccessor(dict) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['dict'], 94206128762464) + | | | +- GuardManager: source=G['__builtins_dict___7']['iter'], accessed_by=DictGetItemGuardAccessor(iter) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['iter'], 140565189726576) + | | | +- GuardManager: source=G['__builtins_dict___7']['isinstance'], accessed_by=DictGetItemGuardAccessor(isinstance) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['isinstance'], 140565189726416) + | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_linear) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'], 140563315432704) + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F, accessed_by=GetAttrGuardAccessor(F) + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_module) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_module'], 140563346519712) + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_hooks) + | | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_hooks) + | | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_pre_hooks) + | | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_pre_hooks) + | | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks + | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_sparse) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'], 140563269489408) + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F, accessed_by=GetAttrGuardAccessor(F) + | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F, 140563315434864) + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.linear, accessed_by=GetAttrGuardAccessor(linear) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.linear, 140563483168176) + | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.dropout, accessed_by=GetAttrGuardAccessor(dropout) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.dropout, 140563303880096) + | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.embedding, accessed_by=GetAttrGuardAccessor(embedding) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.embedding, 140563303884704) + | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.layer_norm, accessed_by=GetAttrGuardAccessor(layer_norm) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.layer_norm, 140563303885712) + | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.scaled_dot_product_attention, accessed_by=GetAttrGuardAccessor(scaled_dot_product_attention) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.scaled_dot_product_attention, 140563482417552) + | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_dropout) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_dropout'], 140563269207600) + | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'].F, accessed_by=GetAttrGuardAccessor(F) + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F + | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_container'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_container) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_container'], 140563268955776) + | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'], accessed_by=DictGetItemGuardAccessor(__import_torchbenchmark_dot_models_dot_nanogpt_dot_model) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'], 140561618535104) + | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math, accessed_by=GetAttrGuardAccessor(math) + | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math, 140565183540704) + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.pi, accessed_by=GetAttrGuardAccessor(pi) + | | | | | +- EQUALS_MATCH: G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.pi == 3.141592653589793 + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.sqrt, 140565184131408) + | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch, accessed_by=GetAttrGuardAccessor(torch) + | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch, 140565184683664) + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn, accessed_by=GetAttrGuardAccessor(nn) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn, 140563346511472) + | | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional, accessed_by=GetAttrGuardAccessor(functional) + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.pow, accessed_by=GetAttrGuardAccessor(pow) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.pow, 140565181125936) + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.long, accessed_by=GetAttrGuardAccessor(long) + | | | | | +- EQUALS_MATCH: G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.long == torch.int64 + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.tanh, accessed_by=GetAttrGuardAccessor(tanh) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.tanh, 140565181101168) + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.arange, accessed_by=GetAttrGuardAccessor(arange) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.arange, 140565181040736) + | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu, accessed_by=GetAttrGuardAccessor(new_gelu) + | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu.__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu.__code__, 140561652999328) + | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F, accessed_by=GetAttrGuardAccessor(F) + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F + +V0806 13:56:01.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "512cbce354a3c2247fd5ce255fa5310b"} + { + "name": "entire_frame_compile", + "ts": 1722977761005735.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "69d31a588f8918001abc5fb068f845d2"} + { + "name": "_compile.compile_inner", + "ts": 1722977761005843.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.006000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "2/0", "frame_key": "3", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 438, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 1713, "shape_env_guard_count": 0, "graph_op_count": 393, "graph_node_count": 543, "graph_input_count": 149, "start_time": 1722977751.3634944, "entire_frame_compile_time_s": 9.642390012741089, "backend_compile_time_s": 8.403583765029907, "inductor_compile_time_s": 4.873299598693848, "code_gen_time_s": 3.313795566558838, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function reduce_to_scalar_loss in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.4969205856323242, "has_guarded_code": true}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.007000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "84f68f158e9e577b45fe78f1b10b1a9c"} + { + "name": "cudagraphify", + "ts": 1722977761007495.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.007000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9693821b0ca26729ddde83adc52af6fb"} + { + "name": "cudagraphify", + "ts": 1722977761007762.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.268000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0d4a8e93cf617b30c0e2c59d80cd0e78"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761268522.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.512000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0143b142071834aec8ab7d4b288e8a91"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761511918.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.513000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5e2f289c2b278e69c80cc13db919462a"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761513285.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.604000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ae366a49f803f1a405565bf9f2c9283d"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761604478.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.610000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0125a12d93ac7f49266a1f310a5a9705"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761610516.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.703000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9a2e32e482a8b02c1de380e4f7d9c3a0"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977761703233.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.705000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 426, "name": "compute_loss", "filename": 1}]}, "frame_id": 3, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:01.705000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a345d7189414418bbe21675f688e5406"} + { + "name": "_compile.compile_inner", + "ts": 1722977761705841.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.705000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "53f1497b31d0e8e868eb5ed513c8edf3"} + { + "name": "entire_frame_compile", + "ts": 1722977761705918.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 109, "size": 201216}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 3, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 1, 50304], "requires_grad": true, "stride": [50304, 50304, 1], "storage": 0, "view_func": "", "describer_id": 109}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 109, "id": 0, "source": "L['pred']"}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.710000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1, "has_payload": "0251979d852e8b490120b35388d9750f"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['pred'], accessed_by=DictGetItemGuardAccessor(pred) + | | +- TENSOR_MATCH: check_tensor(L['pred'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1, 1, 50304], stride=[50304, 50304, 1]) + | | +- NO_HASATTR: hasattr(L['pred'], '_dynamo_dynamic_indices') == False + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['reduce_to_scalar_loss'], accessed_by=DictGetItemGuardAccessor(reduce_to_scalar_loss) + | | | +- ID_MATCH: ___check_obj_id(G['reduce_to_scalar_loss'], 140561699517584) + +V0806 13:56:01.710000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e2fbb5cb8efb7e6c931fbcfb93b33000"} + { + "name": "entire_frame_compile", + "ts": 1722977761710834.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.710000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f378dc6ebc87b79696a622fb1503e8c9"} + { + "name": "_compile.compile_inner", + "ts": 1722977761710908.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "3/0", "frame_key": "4", "co_name": "compute_loss", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 426, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 1, "graph_input_count": 1, "start_time": 1722977761.7058077, "entire_frame_compile_time_s": 0.005119800567626953, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function reduce_to_scalar_loss in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.0011796951293945312, "has_guarded_code": true}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.711000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}]}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5563f1c7c00ed0592ebe0721d8d15aaa"} + { + "name": "_compile.compile_inner", + "ts": 1722977761711605.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0d4ebb6c2078eda941d9db2c5c59c5a3"} + { + "name": "entire_frame_compile", + "ts": 1722977761711674.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 110, "size": 4}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:01.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 0, "view_func": "", "describer_id": 110}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:01.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 110, "id": 0, "source": "L['___stack1']"}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:01.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 111, "size": 4}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 0, "view_func": "", "describer_id": 111}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 111, "id": 0, "source": "L['___stack1']"}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.720000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1, "has_payload": "88e89e154b64eda379f4f8faa2423abf"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self) + | | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624) + | | +- GuardManager: source=L['self'].grad_scaler, accessed_by=GetAttrGuardAccessor(grad_scaler) + | | | +- TYPE_MATCH: ___check_type_id(L['self'].grad_scaler, 94206246390304) + | +- GuardManager: source=L['___stack0'], accessed_by=DictGetItemGuardAccessor(___stack0) + | | +- ID_MATCH: ___check_obj_id(L['___stack0'], 94206129614704) + | +- GuardManager: source=L['___stack1'], accessed_by=DictGetItemGuardAccessor(___stack1) + | | +- TENSOR_MATCH: check_tensor(L['___stack1'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[], stride=[]) + | | +- NO_HASATTR: hasattr(L['___stack1'], '_dynamo_dynamic_indices') == False + +V0806 13:56:01.720000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "779e853a676e6fc1f8bc98d08b7a37e0"} + { + "name": "entire_frame_compile", + "ts": 1722977761720908.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.720000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7007ee210e774dfdfc169a55bde2c2e9"} + { + "name": "_compile.compile_inner", + "ts": 1722977761720981.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:01.721000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "4/0", "frame_key": "5", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 444, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 9, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 1, "graph_input_count": 1, "start_time": 1722977761.7115858, "entire_frame_compile_time_s": 0.009430170059204102, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["Tensor.backward"], "dynamo_time_before_restart_s": 0.0031707286834716797, "has_guarded_code": true}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:01.722000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "18b8fcfb792a459c0a5419875f8b2f45"} + { + "name": "compile_fx..bw_compiler", + "ts": 1722977761722201.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.723000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "dae2747e1c3069c100850462650d8e87"} + { + "name": "compile_fx_inner", + "ts": 1722977761723042.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.723000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c375948250791b18bbfdf36bd1f57da2"} + { + "name": "inductor_compile", + "ts": 1722977761723133.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:01.869000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "0d9dc4e9b2e5066df686c9b12e9da24f"} + + import torch + from torch import tensor, device + import torch.fx as fx + from torch._dynamo.testing import rand_strided + from math import inf + import torch._inductor.inductor_prims + + import torch._dynamo.config + import torch._inductor.config + import torch._functorch.config + import torch.fx.experimental._config + + torch._inductor.config.triton.cudagraphs = True + + + + + isolate_fails_code_str = None + + + + # torch version: 2.5.0a0+git6fbc72b + # torch cuda version: 12.0 + # torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d + + + # CUDA Info: + # nvcc: NVIDIA (R) Cuda compiler driver + # Copyright (c) 2005-2023 NVIDIA Corporation + # Built on Fri_Jan__6_16:45:21_PST_2023 + # Cuda compilation tools, release 12.0, V12.0.140 + # Build cuda_12.0.r12.0/compiler.32267302_0 + + # GPU Hardware Info: + # NVIDIA H100 : 1 + + + from torch.nn import * + class Repro(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + + + def forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1): + view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]); tangents_1 = None + permute_97 = torch.ops.aten.permute.default(view_146, [1, 0]) + mm_1 = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None + permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None + mm_2 = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None + view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]); mm_2 = None + permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None + full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True); full_default_1 = full_default = view_147 = None + mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148); primals_148 = None + mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768) + sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True) + mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96); mul_99 = None + sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None + mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2); sum_2 = None + sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None + sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None + mul_103 = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None + mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96); mul_96 = None + sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None + sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]); index_put = None + view_148 = torch.ops.aten.view.default(mul_103, [64, 768]) + mm_3 = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None + permute_102 = torch.ops.aten.permute.default(view_148, [1, 0]) + mm_4 = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None + permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None + sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None + view_149 = torch.ops.aten.view.default(sum_5, [768]); sum_5 = None + permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None + view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]); mm_3 = None + view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]); addmm_46 = None + mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5) + mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None + pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None + mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None + add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0) + mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None + mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11); tanh_11 = None + sub_28 = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None + mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None + mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None + mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715) + pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None + mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None + mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None + add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None + mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None + add_100 = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None + view_151 = torch.ops.aten.view.default(add_100, [64, 3072]); add_100 = None + mm_5 = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None + permute_106 = torch.ops.aten.permute.default(view_151, [1, 0]) + mm_6 = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None + permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None + sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None + view_152 = torch.ops.aten.view.default(sum_6, [3072]); sum_6 = None + permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None + view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]); mm_5 = None + mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None + mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768) + sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True) + mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90); mul_115 = None + sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None + mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8); sum_8 = None + sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None + sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None + mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None + mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90); mul_90 = None + sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None + sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None + add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None + view_154 = torch.ops.aten.view.default(add_101, [64, 768]) + mm_7 = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None + permute_110 = torch.ops.aten.permute.default(view_154, [1, 0]) + permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None + view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None + mm_8 = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None + permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None + sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None + view_155 = torch.ops.aten.view.default(sum_11, [768]); sum_11 = None + permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None + view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]); mm_7 = None + view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]); view_156 = None + permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None + _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None + getitem_134 = _scaled_dot_product_efficient_attention_backward[0] + getitem_135 = _scaled_dot_product_efficient_attention_backward[1] + getitem_136 = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None + permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None + view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]); permute_114 = None + permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None + view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]); permute_115 = None + permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None + view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]); permute_116 = None + cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None + view_161 = torch.ops.aten.view.default(cat, [64, 2304]); cat = None + mm_9 = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None + permute_118 = torch.ops.aten.permute.default(view_161, [1, 0]) + mm_10 = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None + permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None + sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None + view_162 = torch.ops.aten.view.default(sum_12, [2304]); sum_12 = None + permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None + view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]); mm_9 = None + mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None + mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768) + sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True) + mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88); mul_122 = None + sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None + mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14); sum_14 = None + sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None + sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None + mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None + mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88); mul_88 = None + sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None + sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None + add_102 = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None + view_164 = torch.ops.aten.view.default(add_102, [64, 768]) + mm_11 = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None + permute_122 = torch.ops.aten.permute.default(view_164, [1, 0]) + mm_12 = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None + permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None + sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None + view_165 = torch.ops.aten.view.default(sum_17, [768]); sum_17 = None + permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None + view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]); mm_11 = None + view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]); addmm_42 = None + mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5) + mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None + pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None + mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None + add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0) + mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None + mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10); tanh_10 = None + sub_35 = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None + mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None + mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None + mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715) + pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None + mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None + mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None + add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None + mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None + add_104 = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None + view_167 = torch.ops.aten.view.default(add_104, [64, 3072]); add_104 = None + mm_13 = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None + permute_126 = torch.ops.aten.permute.default(view_167, [1, 0]) + mm_14 = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None + permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None + sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None + view_168 = torch.ops.aten.view.default(sum_18, [3072]); sum_18 = None + permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None + view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]); mm_13 = None + mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None + mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768) + sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True) + mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82); mul_138 = None + sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None + mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20); sum_20 = None + sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None + sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None + mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None + mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82); mul_82 = None + sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None + sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None + add_105 = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None + view_170 = torch.ops.aten.view.default(add_105, [64, 768]) + mm_15 = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None + permute_130 = torch.ops.aten.permute.default(view_170, [1, 0]) + permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None + view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None + mm_16 = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None + permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None + sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None + view_171 = torch.ops.aten.view.default(sum_23, [768]); sum_23 = None + permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None + view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]); mm_15 = None + view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]); view_172 = None + permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None + _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None + getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0] + getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1] + getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None + permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None + view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]); permute_134 = None + permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None + view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]); permute_135 = None + permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None + view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]); permute_136 = None + cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None + view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]); cat_1 = None + mm_17 = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None + permute_138 = torch.ops.aten.permute.default(view_177, [1, 0]) + mm_18 = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None + permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None + sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None + view_178 = torch.ops.aten.view.default(sum_24, [2304]); sum_24 = None + permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None + view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]); mm_17 = None + mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None + mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768) + sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True) + mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80); mul_145 = None + sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None + mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26); sum_26 = None + sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None + sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None + mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None + mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80); mul_80 = None + sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None + sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None + add_106 = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None + view_180 = torch.ops.aten.view.default(add_106, [64, 768]) + mm_19 = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None + permute_142 = torch.ops.aten.permute.default(view_180, [1, 0]) + mm_20 = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None + permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None + sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None + view_181 = torch.ops.aten.view.default(sum_29, [768]); sum_29 = None + permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None + view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]); mm_19 = None + view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]); addmm_38 = None + mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5) + mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None + pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None + mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None + add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0) + mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None + mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9); tanh_9 = None + sub_42 = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None + mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None + mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None + mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715) + pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None + mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None + mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None + add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None + mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None + add_108 = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None + view_183 = torch.ops.aten.view.default(add_108, [64, 3072]); add_108 = None + mm_21 = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None + permute_146 = torch.ops.aten.permute.default(view_183, [1, 0]) + mm_22 = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None + permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None + sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None + view_184 = torch.ops.aten.view.default(sum_30, [3072]); sum_30 = None + permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None + view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]); mm_21 = None + mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None + mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768) + sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True) + mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74); mul_161 = None + sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None + mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32); sum_32 = None + sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None + sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None + mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None + mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74); mul_74 = None + sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None + sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None + add_109 = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None + view_186 = torch.ops.aten.view.default(add_109, [64, 768]) + mm_23 = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None + permute_150 = torch.ops.aten.permute.default(view_186, [1, 0]) + permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None + view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None + mm_24 = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None + permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None + sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None + view_187 = torch.ops.aten.view.default(sum_35, [768]); sum_35 = None + permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None + view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]); mm_23 = None + view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]); view_188 = None + permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None + _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None + getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0] + getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1] + getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None + permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None + view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]); permute_154 = None + permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None + view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]); permute_155 = None + permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None + view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]); permute_156 = None + cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None + view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]); cat_2 = None + mm_25 = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None + permute_158 = torch.ops.aten.permute.default(view_193, [1, 0]) + mm_26 = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None + permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None + sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None + view_194 = torch.ops.aten.view.default(sum_36, [2304]); sum_36 = None + permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None + view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]); mm_25 = None + mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None + mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768) + sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True) + mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72); mul_168 = None + sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None + mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38); sum_38 = None + sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None + sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None + mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None + mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72); mul_72 = None + sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None + sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None + add_110 = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None + view_196 = torch.ops.aten.view.default(add_110, [64, 768]) + mm_27 = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None + permute_162 = torch.ops.aten.permute.default(view_196, [1, 0]) + mm_28 = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None + permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None + sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None + view_197 = torch.ops.aten.view.default(sum_41, [768]); sum_41 = None + permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None + view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]); mm_27 = None + view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]); addmm_34 = None + mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5) + mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None + pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None + mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None + add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0) + mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None + mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8); tanh_8 = None + sub_49 = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None + mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None + mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None + mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715) + pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None + mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None + mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None + add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None + mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None + add_112 = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None + view_199 = torch.ops.aten.view.default(add_112, [64, 3072]); add_112 = None + mm_29 = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None + permute_166 = torch.ops.aten.permute.default(view_199, [1, 0]) + mm_30 = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None + permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None + sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None + view_200 = torch.ops.aten.view.default(sum_42, [3072]); sum_42 = None + permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None + view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]); mm_29 = None + mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None + mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768) + sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True) + mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66); mul_184 = None + sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None + mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44); sum_44 = None + sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None + sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None + mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None + mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66); mul_66 = None + sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None + sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None + add_113 = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None + view_202 = torch.ops.aten.view.default(add_113, [64, 768]) + mm_31 = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None + permute_170 = torch.ops.aten.permute.default(view_202, [1, 0]) + permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None + view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None + mm_32 = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None + permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None + sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None + view_203 = torch.ops.aten.view.default(sum_47, [768]); sum_47 = None + permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None + view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]); mm_31 = None + view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]); view_204 = None + permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None + _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None + getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0] + getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1] + getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None + permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None + view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]); permute_174 = None + permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None + view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]); permute_175 = None + permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None + view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]); permute_176 = None + cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None + view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]); cat_3 = None + mm_33 = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None + permute_178 = torch.ops.aten.permute.default(view_209, [1, 0]) + mm_34 = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None + permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None + sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None + view_210 = torch.ops.aten.view.default(sum_48, [2304]); sum_48 = None + permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None + view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]); mm_33 = None + mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None + mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768) + sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True) + mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64); mul_191 = None + sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None + mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50); sum_50 = None + sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None + sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None + mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None + mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64); mul_64 = None + sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None + sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None + add_114 = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None + view_212 = torch.ops.aten.view.default(add_114, [64, 768]) + mm_35 = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None + permute_182 = torch.ops.aten.permute.default(view_212, [1, 0]) + mm_36 = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None + permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None + sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None + view_213 = torch.ops.aten.view.default(sum_53, [768]); sum_53 = None + permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None + view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]); mm_35 = None + view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]); addmm_30 = None + mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5) + mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None + pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None + mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None + add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0) + mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None + mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7); tanh_7 = None + sub_56 = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None + mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None + mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None + mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715) + pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None + mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None + mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None + add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None + mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None + add_116 = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None + view_215 = torch.ops.aten.view.default(add_116, [64, 3072]); add_116 = None + mm_37 = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None + permute_186 = torch.ops.aten.permute.default(view_215, [1, 0]) + mm_38 = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None + permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None + sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None + view_216 = torch.ops.aten.view.default(sum_54, [3072]); sum_54 = None + permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None + view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]); mm_37 = None + mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None + mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768) + sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True) + mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58); mul_207 = None + sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None + mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56); sum_56 = None + sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None + sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None + mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None + mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58); mul_58 = None + sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None + sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None + add_117 = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None + view_218 = torch.ops.aten.view.default(add_117, [64, 768]) + mm_39 = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None + permute_190 = torch.ops.aten.permute.default(view_218, [1, 0]) + permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None + view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None + mm_40 = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None + permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None + sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None + view_219 = torch.ops.aten.view.default(sum_59, [768]); sum_59 = None + permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None + view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]); mm_39 = None + view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]); view_220 = None + permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None + _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None + getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0] + getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1] + getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None + permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None + view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]); permute_194 = None + permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None + view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]); permute_195 = None + permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None + view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]); permute_196 = None + cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None + view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]); cat_4 = None + mm_41 = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None + permute_198 = torch.ops.aten.permute.default(view_225, [1, 0]) + mm_42 = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None + permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None + sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None + view_226 = torch.ops.aten.view.default(sum_60, [2304]); sum_60 = None + permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None + view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]); mm_41 = None + mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None + mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768) + sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True) + mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56); mul_214 = None + sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None + mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62); sum_62 = None + sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None + sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None + mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None + mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56); mul_56 = None + sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None + sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None + add_118 = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None + view_228 = torch.ops.aten.view.default(add_118, [64, 768]) + mm_43 = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None + permute_202 = torch.ops.aten.permute.default(view_228, [1, 0]) + mm_44 = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None + permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None + sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None + view_229 = torch.ops.aten.view.default(sum_65, [768]); sum_65 = None + permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None + view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]); mm_43 = None + view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]); addmm_26 = None + mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5) + mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None + pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None + mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None + add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0) + mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None + mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6); tanh_6 = None + sub_63 = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None + mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None + mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None + mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715) + pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None + mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None + mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None + add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None + mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None + add_120 = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None + view_231 = torch.ops.aten.view.default(add_120, [64, 3072]); add_120 = None + mm_45 = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None + permute_206 = torch.ops.aten.permute.default(view_231, [1, 0]) + mm_46 = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None + permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None + sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None + view_232 = torch.ops.aten.view.default(sum_66, [3072]); sum_66 = None + permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None + view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]); mm_45 = None + mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None + mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768) + sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True) + mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50); mul_230 = None + sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None + mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68); sum_68 = None + sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None + sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None + mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None + mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50); mul_50 = None + sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None + sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None + add_121 = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None + view_234 = torch.ops.aten.view.default(add_121, [64, 768]) + mm_47 = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None + permute_210 = torch.ops.aten.permute.default(view_234, [1, 0]) + permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None + view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None + mm_48 = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None + permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None + sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None + view_235 = torch.ops.aten.view.default(sum_71, [768]); sum_71 = None + permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None + view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]); mm_47 = None + view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]); view_236 = None + permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None + _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None + getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0] + getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1] + getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None + permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None + view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]); permute_214 = None + permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None + view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]); permute_215 = None + permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None + view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]); permute_216 = None + cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None + view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]); cat_5 = None + mm_49 = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None + permute_218 = torch.ops.aten.permute.default(view_241, [1, 0]) + mm_50 = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None + permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None + sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None + view_242 = torch.ops.aten.view.default(sum_72, [2304]); sum_72 = None + permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None + view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]); mm_49 = None + mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None + mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768) + sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True) + mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48); mul_237 = None + sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None + mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74); sum_74 = None + sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None + sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None + mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None + mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48); mul_48 = None + sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None + sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None + add_122 = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None + view_244 = torch.ops.aten.view.default(add_122, [64, 768]) + mm_51 = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None + permute_222 = torch.ops.aten.permute.default(view_244, [1, 0]) + mm_52 = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None + permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None + sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None + view_245 = torch.ops.aten.view.default(sum_77, [768]); sum_77 = None + permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None + view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]); mm_51 = None + view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]); addmm_22 = None + mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5) + mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None + pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None + mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None + add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0) + mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None + mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5); tanh_5 = None + sub_70 = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None + mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None + mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None + mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715) + pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None + mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None + mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None + add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None + mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None + add_124 = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None + view_247 = torch.ops.aten.view.default(add_124, [64, 3072]); add_124 = None + mm_53 = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None + permute_226 = torch.ops.aten.permute.default(view_247, [1, 0]) + mm_54 = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None + permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None + sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None + view_248 = torch.ops.aten.view.default(sum_78, [3072]); sum_78 = None + permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None + view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]); mm_53 = None + mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None + mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768) + sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True) + mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42); mul_253 = None + sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None + mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80); sum_80 = None + sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None + sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None + mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None + mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42); mul_42 = None + sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None + sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None + add_125 = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None + view_250 = torch.ops.aten.view.default(add_125, [64, 768]) + mm_55 = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None + permute_230 = torch.ops.aten.permute.default(view_250, [1, 0]) + permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None + view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None + mm_56 = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None + permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None + sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None + view_251 = torch.ops.aten.view.default(sum_83, [768]); sum_83 = None + permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None + view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]); mm_55 = None + view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]); view_252 = None + permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None + _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None + getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0] + getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1] + getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None + permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None + view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]); permute_234 = None + permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None + view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]); permute_235 = None + permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None + view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]); permute_236 = None + cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None + view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]); cat_6 = None + mm_57 = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None + permute_238 = torch.ops.aten.permute.default(view_257, [1, 0]) + mm_58 = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None + permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None + sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None + view_258 = torch.ops.aten.view.default(sum_84, [2304]); sum_84 = None + permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None + view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]); mm_57 = None + mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None + mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768) + sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True) + mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40); mul_260 = None + sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None + mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86); sum_86 = None + sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None + sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None + mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None + mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40); mul_40 = None + sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None + sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None + add_126 = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None + view_260 = torch.ops.aten.view.default(add_126, [64, 768]) + mm_59 = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None + permute_242 = torch.ops.aten.permute.default(view_260, [1, 0]) + mm_60 = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None + permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None + sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None + view_261 = torch.ops.aten.view.default(sum_89, [768]); sum_89 = None + permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None + view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]); mm_59 = None + view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]); addmm_18 = None + mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5) + mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None + pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None + mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None + add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0) + mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None + mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4); tanh_4 = None + sub_77 = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None + mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None + mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None + mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715) + pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None + mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None + mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None + add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None + mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None + add_128 = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None + view_263 = torch.ops.aten.view.default(add_128, [64, 3072]); add_128 = None + mm_61 = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None + permute_246 = torch.ops.aten.permute.default(view_263, [1, 0]) + mm_62 = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None + permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None + sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None + view_264 = torch.ops.aten.view.default(sum_90, [3072]); sum_90 = None + permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None + view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]); mm_61 = None + mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None + mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768) + sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True) + mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34); mul_276 = None + sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None + mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92); sum_92 = None + sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None + sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None + mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None + mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34); mul_34 = None + sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None + sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None + add_129 = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None + view_266 = torch.ops.aten.view.default(add_129, [64, 768]) + mm_63 = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None + permute_250 = torch.ops.aten.permute.default(view_266, [1, 0]) + permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None + view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None + mm_64 = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None + permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None + sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None + view_267 = torch.ops.aten.view.default(sum_95, [768]); sum_95 = None + permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None + view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]); mm_63 = None + view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]); view_268 = None + permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None + _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None + getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0] + getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1] + getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None + permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None + view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]); permute_254 = None + permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None + view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]); permute_255 = None + permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None + view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]); permute_256 = None + cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None + view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]); cat_7 = None + mm_65 = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None + permute_258 = torch.ops.aten.permute.default(view_273, [1, 0]) + mm_66 = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None + permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None + sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None + view_274 = torch.ops.aten.view.default(sum_96, [2304]); sum_96 = None + permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None + view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]); mm_65 = None + mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None + mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768) + sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True) + mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32); mul_283 = None + sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None + mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98); sum_98 = None + sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None + sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None + mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None + mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32); mul_32 = None + sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None + sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None + add_130 = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None + view_276 = torch.ops.aten.view.default(add_130, [64, 768]) + mm_67 = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None + permute_262 = torch.ops.aten.permute.default(view_276, [1, 0]) + mm_68 = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None + permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None + sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None + view_277 = torch.ops.aten.view.default(sum_101, [768]); sum_101 = None + permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None + view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]); mm_67 = None + view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]); addmm_14 = None + mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5) + mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None + pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None + mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None + add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0) + mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None + mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3); tanh_3 = None + sub_84 = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None + mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None + mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None + mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715) + pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None + mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None + mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None + add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None + mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None + add_132 = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None + view_279 = torch.ops.aten.view.default(add_132, [64, 3072]); add_132 = None + mm_69 = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None + permute_266 = torch.ops.aten.permute.default(view_279, [1, 0]) + mm_70 = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None + permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None + sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None + view_280 = torch.ops.aten.view.default(sum_102, [3072]); sum_102 = None + permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None + view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]); mm_69 = None + mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None + mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768) + sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True) + mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26); mul_299 = None + sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None + mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104); sum_104 = None + sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None + sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None + mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None + mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26); mul_26 = None + sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None + sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None + add_133 = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None + view_282 = torch.ops.aten.view.default(add_133, [64, 768]) + mm_71 = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None + permute_270 = torch.ops.aten.permute.default(view_282, [1, 0]) + permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None + view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None + mm_72 = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None + permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None + sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None + view_283 = torch.ops.aten.view.default(sum_107, [768]); sum_107 = None + permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None + view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]); mm_71 = None + view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]); view_284 = None + permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None + _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None + getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0] + getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1] + getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None + permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None + view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]); permute_274 = None + permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None + view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]); permute_275 = None + permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None + view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]); permute_276 = None + cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None + view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]); cat_8 = None + mm_73 = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None + permute_278 = torch.ops.aten.permute.default(view_289, [1, 0]) + mm_74 = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None + permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None + sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None + view_290 = torch.ops.aten.view.default(sum_108, [2304]); sum_108 = None + permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None + view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]); mm_73 = None + mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None + mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768) + sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True) + mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24); mul_306 = None + sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None + mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110); sum_110 = None + sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None + sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None + mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None + mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24); mul_24 = None + sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None + sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None + add_134 = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None + view_292 = torch.ops.aten.view.default(add_134, [64, 768]) + mm_75 = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None + permute_282 = torch.ops.aten.permute.default(view_292, [1, 0]) + mm_76 = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None + permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None + sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None + view_293 = torch.ops.aten.view.default(sum_113, [768]); sum_113 = None + permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None + view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]); mm_75 = None + view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]); addmm_10 = None + mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5) + mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None + pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None + mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None + add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0) + mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None + mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2); tanh_2 = None + sub_91 = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None + mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None + mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None + mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715) + pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None + mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None + mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None + add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None + mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None + add_136 = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None + view_295 = torch.ops.aten.view.default(add_136, [64, 3072]); add_136 = None + mm_77 = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None + permute_286 = torch.ops.aten.permute.default(view_295, [1, 0]) + mm_78 = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None + permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None + sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None + view_296 = torch.ops.aten.view.default(sum_114, [3072]); sum_114 = None + permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None + view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]); mm_77 = None + mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None + mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768) + sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True) + mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18); mul_322 = None + sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None + mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116); sum_116 = None + sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None + sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None + mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None + mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18); mul_18 = None + sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None + sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None + add_137 = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None + view_298 = torch.ops.aten.view.default(add_137, [64, 768]) + mm_79 = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None + permute_290 = torch.ops.aten.permute.default(view_298, [1, 0]) + permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None + view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None + mm_80 = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None + permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None + sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None + view_299 = torch.ops.aten.view.default(sum_119, [768]); sum_119 = None + permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None + view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]); mm_79 = None + view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]); view_300 = None + permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None + _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None + getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0] + getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1] + getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None + permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None + view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]); permute_294 = None + permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None + view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]); permute_295 = None + permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None + view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]); permute_296 = None + cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None + view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]); cat_9 = None + mm_81 = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None + permute_298 = torch.ops.aten.permute.default(view_305, [1, 0]) + mm_82 = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None + permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None + sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None + view_306 = torch.ops.aten.view.default(sum_120, [2304]); sum_120 = None + permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None + view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]); mm_81 = None + mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None + mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768) + sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True) + mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16); mul_329 = None + sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None + mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122); sum_122 = None + sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None + sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None + mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None + mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16); mul_16 = None + sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None + sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None + add_138 = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None + view_308 = torch.ops.aten.view.default(add_138, [64, 768]) + mm_83 = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None + permute_302 = torch.ops.aten.permute.default(view_308, [1, 0]) + mm_84 = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None + permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None + sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None + view_309 = torch.ops.aten.view.default(sum_125, [768]); sum_125 = None + permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None + view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]); mm_83 = None + view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]); addmm_6 = None + mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5) + mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None + pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None + mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None + add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0) + mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None + mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1); tanh_1 = None + sub_98 = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None + mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None + mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None + mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715) + pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None + mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None + mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None + add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None + mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None + add_140 = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None + view_311 = torch.ops.aten.view.default(add_140, [64, 3072]); add_140 = None + mm_85 = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None + permute_306 = torch.ops.aten.permute.default(view_311, [1, 0]) + mm_86 = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None + permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None + sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None + view_312 = torch.ops.aten.view.default(sum_126, [3072]); sum_126 = None + permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None + view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]); mm_85 = None + mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None + mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768) + sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True) + mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10); mul_345 = None + sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None + mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128); sum_128 = None + sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None + sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None + mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None + mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10); mul_10 = None + sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None + sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None + add_141 = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None + view_314 = torch.ops.aten.view.default(add_141, [64, 768]) + mm_87 = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None + permute_310 = torch.ops.aten.permute.default(view_314, [1, 0]) + permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None + view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None + mm_88 = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None + permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None + sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None + view_315 = torch.ops.aten.view.default(sum_131, [768]); sum_131 = None + permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None + view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]); mm_87 = None + view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]); view_316 = None + permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None + _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None + getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0] + getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1] + getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None + permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None + view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]); permute_314 = None + permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None + view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]); permute_315 = None + permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None + view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]); permute_316 = None + cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None + view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]); cat_10 = None + mm_89 = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None + permute_318 = torch.ops.aten.permute.default(view_321, [1, 0]) + mm_90 = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None + permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None + sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None + view_322 = torch.ops.aten.view.default(sum_132, [2304]); sum_132 = None + permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None + view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]); mm_89 = None + mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None + mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768) + sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True) + mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8); mul_352 = None + sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None + mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134); sum_134 = None + sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None + sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None + mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None + mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8); mul_8 = None + sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None + sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None + add_142 = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None + view_324 = torch.ops.aten.view.default(add_142, [64, 768]) + mm_91 = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None + permute_322 = torch.ops.aten.permute.default(view_324, [1, 0]) + mm_92 = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None + permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None + sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None + view_325 = torch.ops.aten.view.default(sum_137, [768]); sum_137 = None + permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None + view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]); mm_91 = None + view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]); addmm_2 = None + mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5) + mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None + pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None + mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None + add_7 = torch.ops.aten.add.Tensor(tanh, 1.0) + mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None + mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh); tanh = None + sub_105 = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None + mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None + mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None + mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715) + pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None + mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None + mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None + add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None + mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None + add_144 = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None + view_327 = torch.ops.aten.view.default(add_144, [64, 3072]); add_144 = None + mm_93 = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None + permute_326 = torch.ops.aten.permute.default(view_327, [1, 0]) + mm_94 = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None + permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None + sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None + view_328 = torch.ops.aten.view.default(sum_138, [3072]); sum_138 = None + permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None + view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]); mm_93 = None + mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None + mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768) + sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True) + mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2); mul_368 = None + sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None + mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140); sum_140 = None + sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None + sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None + mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None + mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2); mul_2 = None + sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None + sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None + add_145 = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None + view_330 = torch.ops.aten.view.default(add_145, [64, 768]) + mm_95 = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None + permute_330 = torch.ops.aten.permute.default(view_330, [1, 0]) + permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None + view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None + mm_96 = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None + permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None + sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None + view_331 = torch.ops.aten.view.default(sum_143, [768]); sum_143 = None + permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None + view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]); mm_95 = None + view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]); view_332 = None + permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None + _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None + getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0] + getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1] + getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None + permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None + view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]); permute_334 = None + permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None + view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]); permute_335 = None + permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None + view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]); permute_336 = None + cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None + view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]); cat_11 = None + mm_97 = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None + permute_338 = torch.ops.aten.permute.default(view_337, [1, 0]) + mm_98 = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None + permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None + sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None + view_338 = torch.ops.aten.view.default(sum_144, [2304]); sum_144 = None + permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None + view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]); mm_97 = None + mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None + mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768) + sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True) + mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul); mul_375 = None + sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None + mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146); sum_146 = None + sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None + sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None + mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None + mul_380 = torch.ops.aten.mul.Tensor(view_339, mul); mul = None + sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None + sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None + add_146 = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None + eq = torch.ops.aten.eq.Scalar(unsqueeze, -1) + unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1); eq = None + full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146); unsqueeze_1 = None + full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True); full_default_5 = unsqueeze = where = None + eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1) + unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None + where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146); unsqueeze_2 = full_default_4 = add_146 = None + full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True); full_default_7 = primals_1 = where_1 = None + add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None + return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4) + + def load_args(reader): + buf0 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf0, (1, 64), dtype=torch.int64, is_leaf=True) # primals_1 + buf1 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf1, (768,), is_leaf=True) # primals_4 + buf2 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf2, (768,), is_leaf=True) # primals_10 + buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf3, (768,), is_leaf=True) # primals_16 + buf4 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf4, (768,), is_leaf=True) # primals_22 + buf5 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf5, (768,), is_leaf=True) # primals_28 + buf6 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf6, (768,), is_leaf=True) # primals_34 + buf7 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf7, (768,), is_leaf=True) # primals_40 + buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf8, (768,), is_leaf=True) # primals_46 + buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf9, (768,), is_leaf=True) # primals_52 + buf10 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf10, (768,), is_leaf=True) # primals_58 + buf11 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf11, (768,), is_leaf=True) # primals_64 + buf12 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf12, (768,), is_leaf=True) # primals_70 + buf13 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf13, (768,), is_leaf=True) # primals_76 + buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf14, (768,), is_leaf=True) # primals_82 + buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf15, (768,), is_leaf=True) # primals_88 + buf16 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf16, (768,), is_leaf=True) # primals_94 + buf17 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf17, (768,), is_leaf=True) # primals_100 + buf18 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf18, (768,), is_leaf=True) # primals_106 + buf19 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf19, (768,), is_leaf=True) # primals_112 + buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf20, (768,), is_leaf=True) # primals_118 + buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf21, (768,), is_leaf=True) # primals_124 + buf22 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf22, (768,), is_leaf=True) # primals_130 + buf23 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf23, (768,), is_leaf=True) # primals_136 + buf24 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf24, (768,), is_leaf=True) # primals_142 + buf25 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf25, (768,), is_leaf=True) # primals_148 + buf26 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf26, (1, 64), dtype=torch.int64, is_leaf=True) # unsqueeze + buf27 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf27, (1, 64, 768), is_leaf=True) # mul + buf28 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf28, (64, 768), is_leaf=True) # view + buf29 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_1 + reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_2 + reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_3 + buf30 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf30, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_5 + buf31 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf31, (1, 12, 64), is_leaf=True) # getitem_6 + buf32 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf32, (), dtype=torch.int64, is_leaf=True) # getitem_7 + buf33 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf33, (), dtype=torch.int64, is_leaf=True) # getitem_8 + buf34 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf34, (1, 64, 768), is_leaf=True) # mul_2 + buf35 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf35, (64, 768), is_leaf=True) # view_8 + buf36 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf36, (64, 3072), is_leaf=True) # addmm_2 + buf37 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf37, (64, 3072), is_leaf=True) # view_10 + buf38 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf38, (1, 64, 768), is_leaf=True) # mul_8 + buf39 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf39, (64, 768), is_leaf=True) # view_12 + buf40 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_9 + reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_10 + reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_11 + buf41 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf41, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_16 + buf42 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf42, (1, 12, 64), is_leaf=True) # getitem_17 + buf43 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf43, (), dtype=torch.int64, is_leaf=True) # getitem_18 + buf44 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf44, (), dtype=torch.int64, is_leaf=True) # getitem_19 + buf45 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf45, (1, 64, 768), is_leaf=True) # mul_10 + buf46 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf46, (64, 768), is_leaf=True) # view_20 + buf47 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf47, (64, 3072), is_leaf=True) # addmm_6 + buf48 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf48, (64, 3072), is_leaf=True) # view_22 + buf49 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf49, (1, 64, 768), is_leaf=True) # mul_16 + buf50 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf50, (64, 768), is_leaf=True) # view_24 + buf51 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_17 + reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_18 + reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_19 + buf52 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf52, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_27 + buf53 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf53, (1, 12, 64), is_leaf=True) # getitem_28 + buf54 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf54, (), dtype=torch.int64, is_leaf=True) # getitem_29 + buf55 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf55, (), dtype=torch.int64, is_leaf=True) # getitem_30 + buf56 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf56, (1, 64, 768), is_leaf=True) # mul_18 + buf57 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf57, (64, 768), is_leaf=True) # view_32 + buf58 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf58, (64, 3072), is_leaf=True) # addmm_10 + buf59 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf59, (64, 3072), is_leaf=True) # view_34 + buf60 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf60, (1, 64, 768), is_leaf=True) # mul_24 + buf61 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf61, (64, 768), is_leaf=True) # view_36 + buf62 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_25 + reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_26 + reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_27 + buf63 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf63, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_38 + buf64 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf64, (1, 12, 64), is_leaf=True) # getitem_39 + buf65 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf65, (), dtype=torch.int64, is_leaf=True) # getitem_40 + buf66 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf66, (), dtype=torch.int64, is_leaf=True) # getitem_41 + buf67 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf67, (1, 64, 768), is_leaf=True) # mul_26 + buf68 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf68, (64, 768), is_leaf=True) # view_44 + buf69 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf69, (64, 3072), is_leaf=True) # addmm_14 + buf70 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf70, (64, 3072), is_leaf=True) # view_46 + buf71 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf71, (1, 64, 768), is_leaf=True) # mul_32 + buf72 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf72, (64, 768), is_leaf=True) # view_48 + buf73 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_33 + reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_34 + reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_35 + buf74 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf74, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_49 + buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf75, (1, 12, 64), is_leaf=True) # getitem_50 + buf76 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf76, (), dtype=torch.int64, is_leaf=True) # getitem_51 + buf77 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf77, (), dtype=torch.int64, is_leaf=True) # getitem_52 + buf78 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf78, (1, 64, 768), is_leaf=True) # mul_34 + buf79 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf79, (64, 768), is_leaf=True) # view_56 + buf80 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf80, (64, 3072), is_leaf=True) # addmm_18 + buf81 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf81, (64, 3072), is_leaf=True) # view_58 + buf82 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf82, (1, 64, 768), is_leaf=True) # mul_40 + buf83 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf83, (64, 768), is_leaf=True) # view_60 + buf84 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_41 + reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_42 + reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_43 + buf85 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf85, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_60 + buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf86, (1, 12, 64), is_leaf=True) # getitem_61 + buf87 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf87, (), dtype=torch.int64, is_leaf=True) # getitem_62 + buf88 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf88, (), dtype=torch.int64, is_leaf=True) # getitem_63 + buf89 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf89, (1, 64, 768), is_leaf=True) # mul_42 + buf90 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf90, (64, 768), is_leaf=True) # view_68 + buf91 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf91, (64, 3072), is_leaf=True) # addmm_22 + buf92 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf92, (64, 3072), is_leaf=True) # view_70 + buf93 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf93, (1, 64, 768), is_leaf=True) # mul_48 + buf94 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf94, (64, 768), is_leaf=True) # view_72 + buf95 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_49 + reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_50 + reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_51 + buf96 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf96, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_71 + buf97 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf97, (1, 12, 64), is_leaf=True) # getitem_72 + buf98 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf98, (), dtype=torch.int64, is_leaf=True) # getitem_73 + buf99 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf99, (), dtype=torch.int64, is_leaf=True) # getitem_74 + buf100 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf100, (1, 64, 768), is_leaf=True) # mul_50 + buf101 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf101, (64, 768), is_leaf=True) # view_80 + buf102 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf102, (64, 3072), is_leaf=True) # addmm_26 + buf103 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf103, (64, 3072), is_leaf=True) # view_82 + buf104 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf104, (1, 64, 768), is_leaf=True) # mul_56 + buf105 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf105, (64, 768), is_leaf=True) # view_84 + buf106 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_57 + reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_58 + reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_59 + buf107 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf107, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_82 + buf108 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf108, (1, 12, 64), is_leaf=True) # getitem_83 + buf109 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf109, (), dtype=torch.int64, is_leaf=True) # getitem_84 + buf110 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf110, (), dtype=torch.int64, is_leaf=True) # getitem_85 + buf111 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf111, (1, 64, 768), is_leaf=True) # mul_58 + buf112 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf112, (64, 768), is_leaf=True) # view_92 + buf113 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf113, (64, 3072), is_leaf=True) # addmm_30 + buf114 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf114, (64, 3072), is_leaf=True) # view_94 + buf115 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf115, (1, 64, 768), is_leaf=True) # mul_64 + buf116 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf116, (64, 768), is_leaf=True) # view_96 + buf117 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_65 + reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_66 + reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_67 + buf118 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf118, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_93 + buf119 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf119, (1, 12, 64), is_leaf=True) # getitem_94 + buf120 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf120, (), dtype=torch.int64, is_leaf=True) # getitem_95 + buf121 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf121, (), dtype=torch.int64, is_leaf=True) # getitem_96 + buf122 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf122, (1, 64, 768), is_leaf=True) # mul_66 + buf123 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf123, (64, 768), is_leaf=True) # view_104 + buf124 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf124, (64, 3072), is_leaf=True) # addmm_34 + buf125 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf125, (64, 3072), is_leaf=True) # view_106 + buf126 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf126, (1, 64, 768), is_leaf=True) # mul_72 + buf127 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf127, (64, 768), is_leaf=True) # view_108 + buf128 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_73 + reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_74 + reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_75 + buf129 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf129, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_104 + buf130 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf130, (1, 12, 64), is_leaf=True) # getitem_105 + buf131 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf131, (), dtype=torch.int64, is_leaf=True) # getitem_106 + buf132 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf132, (), dtype=torch.int64, is_leaf=True) # getitem_107 + buf133 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf133, (1, 64, 768), is_leaf=True) # mul_74 + buf134 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf134, (64, 768), is_leaf=True) # view_116 + buf135 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf135, (64, 3072), is_leaf=True) # addmm_38 + buf136 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf136, (64, 3072), is_leaf=True) # view_118 + buf137 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf137, (1, 64, 768), is_leaf=True) # mul_80 + buf138 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf138, (64, 768), is_leaf=True) # view_120 + buf139 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_81 + reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_82 + reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_83 + buf140 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf140, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_115 + buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf141, (1, 12, 64), is_leaf=True) # getitem_116 + buf142 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf142, (), dtype=torch.int64, is_leaf=True) # getitem_117 + buf143 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf143, (), dtype=torch.int64, is_leaf=True) # getitem_118 + buf144 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf144, (1, 64, 768), is_leaf=True) # mul_82 + buf145 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf145, (64, 768), is_leaf=True) # view_128 + buf146 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf146, (64, 3072), is_leaf=True) # addmm_42 + buf147 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf147, (64, 3072), is_leaf=True) # view_130 + buf148 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf148, (1, 64, 768), is_leaf=True) # mul_88 + buf149 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf149, (64, 768), is_leaf=True) # view_132 + buf150 = reader.storage(None, 589824, device=device(type='cuda', index=0)) + reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True) # permute_89 + reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True) # permute_90 + reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True) # permute_91 + buf151 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf151, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True) # getitem_126 + buf152 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf152, (1, 12, 64), is_leaf=True) # getitem_127 + buf153 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf153, (), dtype=torch.int64, is_leaf=True) # getitem_128 + buf154 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf154, (), dtype=torch.int64, is_leaf=True) # getitem_129 + buf155 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf155, (1, 64, 768), is_leaf=True) # mul_90 + buf156 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf156, (64, 768), is_leaf=True) # view_140 + buf157 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf157, (64, 3072), is_leaf=True) # addmm_46 + buf158 = reader.storage(None, 786432, device=device(type='cuda', index=0)) + reader.tensor(buf158, (64, 3072), is_leaf=True) # view_142 + buf159 = reader.storage(None, 196608, device=device(type='cuda', index=0)) + reader.tensor(buf159, (1, 64, 768), is_leaf=True) # mul_96 + buf160 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64) + reader.tensor(buf160, (1,), dtype=torch.int64, is_leaf=True) # full_default + buf161 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf161, (1, 768), is_leaf=True) # view_144 + buf162 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf162, (50304, 768), is_leaf=True) # permute_99 + buf163 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf163, (1, 64, 1), is_leaf=True) # div + buf164 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf164, (768, 3072), is_leaf=True) # permute_101 + buf165 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf165, (3072, 768), is_leaf=True) # permute_105 + buf166 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf166, (1, 64, 1), is_leaf=True) # div_1 + buf167 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf167, (768, 768), is_leaf=True) # permute_109 + buf168 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf168, (2304, 768), is_leaf=True) # permute_117 + buf169 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf169, (1, 64, 1), is_leaf=True) # div_2 + buf170 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf170, (768, 3072), is_leaf=True) # permute_121 + buf171 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf171, (3072, 768), is_leaf=True) # permute_125 + buf172 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf172, (1, 64, 1), is_leaf=True) # div_3 + buf173 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf173, (768, 768), is_leaf=True) # permute_129 + buf174 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf174, (2304, 768), is_leaf=True) # permute_137 + buf175 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf175, (1, 64, 1), is_leaf=True) # div_4 + buf176 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf176, (768, 3072), is_leaf=True) # permute_141 + buf177 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf177, (3072, 768), is_leaf=True) # permute_145 + buf178 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf178, (1, 64, 1), is_leaf=True) # div_5 + buf179 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf179, (768, 768), is_leaf=True) # permute_149 + buf180 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf180, (2304, 768), is_leaf=True) # permute_157 + buf181 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf181, (1, 64, 1), is_leaf=True) # div_6 + buf182 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf182, (768, 3072), is_leaf=True) # permute_161 + buf183 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf183, (3072, 768), is_leaf=True) # permute_165 + buf184 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf184, (1, 64, 1), is_leaf=True) # div_7 + buf185 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf185, (768, 768), is_leaf=True) # permute_169 + buf186 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf186, (2304, 768), is_leaf=True) # permute_177 + buf187 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf187, (1, 64, 1), is_leaf=True) # div_8 + buf188 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf188, (768, 3072), is_leaf=True) # permute_181 + buf189 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf189, (3072, 768), is_leaf=True) # permute_185 + buf190 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf190, (1, 64, 1), is_leaf=True) # div_9 + buf191 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf191, (768, 768), is_leaf=True) # permute_189 + buf192 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf192, (2304, 768), is_leaf=True) # permute_197 + buf193 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf193, (1, 64, 1), is_leaf=True) # div_10 + buf194 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf194, (768, 3072), is_leaf=True) # permute_201 + buf195 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf195, (3072, 768), is_leaf=True) # permute_205 + buf196 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf196, (1, 64, 1), is_leaf=True) # div_11 + buf197 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf197, (768, 768), is_leaf=True) # permute_209 + buf198 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf198, (2304, 768), is_leaf=True) # permute_217 + buf199 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf199, (1, 64, 1), is_leaf=True) # div_12 + buf200 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf200, (768, 3072), is_leaf=True) # permute_221 + buf201 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf201, (3072, 768), is_leaf=True) # permute_225 + buf202 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf202, (1, 64, 1), is_leaf=True) # div_13 + buf203 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf203, (768, 768), is_leaf=True) # permute_229 + buf204 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf204, (2304, 768), is_leaf=True) # permute_237 + buf205 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf205, (1, 64, 1), is_leaf=True) # div_14 + buf206 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf206, (768, 3072), is_leaf=True) # permute_241 + buf207 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf207, (3072, 768), is_leaf=True) # permute_245 + buf208 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf208, (1, 64, 1), is_leaf=True) # div_15 + buf209 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf209, (768, 768), is_leaf=True) # permute_249 + buf210 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf210, (2304, 768), is_leaf=True) # permute_257 + buf211 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf211, (1, 64, 1), is_leaf=True) # div_16 + buf212 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf212, (768, 3072), is_leaf=True) # permute_261 + buf213 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf213, (3072, 768), is_leaf=True) # permute_265 + buf214 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf214, (1, 64, 1), is_leaf=True) # div_17 + buf215 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf215, (768, 768), is_leaf=True) # permute_269 + buf216 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf216, (2304, 768), is_leaf=True) # permute_277 + buf217 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf217, (1, 64, 1), is_leaf=True) # div_18 + buf218 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf218, (768, 3072), is_leaf=True) # permute_281 + buf219 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf219, (3072, 768), is_leaf=True) # permute_285 + buf220 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf220, (1, 64, 1), is_leaf=True) # div_19 + buf221 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf221, (768, 768), is_leaf=True) # permute_289 + buf222 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf222, (2304, 768), is_leaf=True) # permute_297 + buf223 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf223, (1, 64, 1), is_leaf=True) # div_20 + buf224 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf224, (768, 3072), is_leaf=True) # permute_301 + buf225 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf225, (3072, 768), is_leaf=True) # permute_305 + buf226 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf226, (1, 64, 1), is_leaf=True) # div_21 + buf227 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf227, (768, 768), is_leaf=True) # permute_309 + buf228 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf228, (2304, 768), is_leaf=True) # permute_317 + buf229 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf229, (1, 64, 1), is_leaf=True) # div_22 + buf230 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf230, (768, 3072), is_leaf=True) # permute_321 + buf231 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf231, (3072, 768), is_leaf=True) # permute_325 + buf232 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf232, (1, 64, 1), is_leaf=True) # div_23 + buf233 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf233, (768, 768), is_leaf=True) # permute_329 + buf234 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf234, (2304, 768), is_leaf=True) # permute_337 + buf235 = reader.storage(None, 256, device=device(type='cuda', index=0)) + reader.tensor(buf235, (1, 64, 1), is_leaf=True) # div_24 + buf236 = reader.storage(None, 201216, device=device(type='cuda', index=0)) + reader.tensor(buf236, (1, 1, 50304), is_leaf=True) # tangents_1 + load_args._version = 0 + mod = Repro() + if __name__ == '__main__': + from torch._dynamo.repro.after_aot import run_repro + with torch.no_grad(): + run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None) + # To run it separately, do + # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None) + # mod(*args) +V0806 13:56:02.106000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "4225f8db403a314e55d0f9cb21612c99"} + class GraphModule(torch.nn.Module): + def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", unsqueeze: "i64[1, 64][64, 1]cuda:0", mul: "f32[1, 64, 768][49152, 768, 1]cuda:0", view: "f32[64, 768][768, 1]cuda:0", permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_7: "i64[][]cuda:0", getitem_8: "i64[][]cuda:0", mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_8: "f32[64, 768][768, 1]cuda:0", addmm_2: "f32[64, 3072][3072, 1]cuda:0", view_10: "f32[64, 3072][3072, 1]cuda:0", mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_12: "f32[64, 768][768, 1]cuda:0", permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_18: "i64[][]cuda:0", getitem_19: "i64[][]cuda:0", mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_20: "f32[64, 768][768, 1]cuda:0", addmm_6: "f32[64, 3072][3072, 1]cuda:0", view_22: "f32[64, 3072][3072, 1]cuda:0", mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_24: "f32[64, 768][768, 1]cuda:0", permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_29: "i64[][]cuda:0", getitem_30: "i64[][]cuda:0", mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_32: "f32[64, 768][768, 1]cuda:0", addmm_10: "f32[64, 3072][3072, 1]cuda:0", view_34: "f32[64, 3072][3072, 1]cuda:0", mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_36: "f32[64, 768][768, 1]cuda:0", permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_40: "i64[][]cuda:0", getitem_41: "i64[][]cuda:0", mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_44: "f32[64, 768][768, 1]cuda:0", addmm_14: "f32[64, 3072][3072, 1]cuda:0", view_46: "f32[64, 3072][3072, 1]cuda:0", mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_48: "f32[64, 768][768, 1]cuda:0", permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_51: "i64[][]cuda:0", getitem_52: "i64[][]cuda:0", mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_56: "f32[64, 768][768, 1]cuda:0", addmm_18: "f32[64, 3072][3072, 1]cuda:0", view_58: "f32[64, 3072][3072, 1]cuda:0", mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_60: "f32[64, 768][768, 1]cuda:0", permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_62: "i64[][]cuda:0", getitem_63: "i64[][]cuda:0", mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_68: "f32[64, 768][768, 1]cuda:0", addmm_22: "f32[64, 3072][3072, 1]cuda:0", view_70: "f32[64, 3072][3072, 1]cuda:0", mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_72: "f32[64, 768][768, 1]cuda:0", permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_73: "i64[][]cuda:0", getitem_74: "i64[][]cuda:0", mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_80: "f32[64, 768][768, 1]cuda:0", addmm_26: "f32[64, 3072][3072, 1]cuda:0", view_82: "f32[64, 3072][3072, 1]cuda:0", mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_84: "f32[64, 768][768, 1]cuda:0", permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_84: "i64[][]cuda:0", getitem_85: "i64[][]cuda:0", mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_92: "f32[64, 768][768, 1]cuda:0", addmm_30: "f32[64, 3072][3072, 1]cuda:0", view_94: "f32[64, 3072][3072, 1]cuda:0", mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_96: "f32[64, 768][768, 1]cuda:0", permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_95: "i64[][]cuda:0", getitem_96: "i64[][]cuda:0", mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_104: "f32[64, 768][768, 1]cuda:0", addmm_34: "f32[64, 3072][3072, 1]cuda:0", view_106: "f32[64, 3072][3072, 1]cuda:0", mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_108: "f32[64, 768][768, 1]cuda:0", permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_106: "i64[][]cuda:0", getitem_107: "i64[][]cuda:0", mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_116: "f32[64, 768][768, 1]cuda:0", addmm_38: "f32[64, 3072][3072, 1]cuda:0", view_118: "f32[64, 3072][3072, 1]cuda:0", mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_120: "f32[64, 768][768, 1]cuda:0", permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_117: "i64[][]cuda:0", getitem_118: "i64[][]cuda:0", mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_128: "f32[64, 768][768, 1]cuda:0", addmm_42: "f32[64, 3072][3072, 1]cuda:0", view_130: "f32[64, 3072][3072, 1]cuda:0", mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_132: "f32[64, 768][768, 1]cuda:0", permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_128: "i64[][]cuda:0", getitem_129: "i64[][]cuda:0", mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_140: "f32[64, 768][768, 1]cuda:0", addmm_46: "f32[64, 3072][3072, 1]cuda:0", view_142: "f32[64, 3072][3072, 1]cuda:0", mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0", full_default: "i64[1][1]cuda:0", view_144: "f32[1, 768][768, 1]cuda:0", permute_99: "f32[50304, 768][768, 1]cuda:0", div: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_101: "f32[768, 3072][3072, 1]cuda:0", permute_105: "f32[3072, 768][768, 1]cuda:0", div_1: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_109: "f32[768, 768][768, 1]cuda:0", permute_117: "f32[2304, 768][768, 1]cuda:0", div_2: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_121: "f32[768, 3072][3072, 1]cuda:0", permute_125: "f32[3072, 768][768, 1]cuda:0", div_3: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_129: "f32[768, 768][768, 1]cuda:0", permute_137: "f32[2304, 768][768, 1]cuda:0", div_4: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_141: "f32[768, 3072][3072, 1]cuda:0", permute_145: "f32[3072, 768][768, 1]cuda:0", div_5: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_149: "f32[768, 768][768, 1]cuda:0", permute_157: "f32[2304, 768][768, 1]cuda:0", div_6: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_161: "f32[768, 3072][3072, 1]cuda:0", permute_165: "f32[3072, 768][768, 1]cuda:0", div_7: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_169: "f32[768, 768][768, 1]cuda:0", permute_177: "f32[2304, 768][768, 1]cuda:0", div_8: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_181: "f32[768, 3072][3072, 1]cuda:0", permute_185: "f32[3072, 768][768, 1]cuda:0", div_9: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_189: "f32[768, 768][768, 1]cuda:0", permute_197: "f32[2304, 768][768, 1]cuda:0", div_10: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_201: "f32[768, 3072][3072, 1]cuda:0", permute_205: "f32[3072, 768][768, 1]cuda:0", div_11: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_209: "f32[768, 768][768, 1]cuda:0", permute_217: "f32[2304, 768][768, 1]cuda:0", div_12: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_221: "f32[768, 3072][3072, 1]cuda:0", permute_225: "f32[3072, 768][768, 1]cuda:0", div_13: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_229: "f32[768, 768][768, 1]cuda:0", permute_237: "f32[2304, 768][768, 1]cuda:0", div_14: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_241: "f32[768, 3072][3072, 1]cuda:0", permute_245: "f32[3072, 768][768, 1]cuda:0", div_15: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_249: "f32[768, 768][768, 1]cuda:0", permute_257: "f32[2304, 768][768, 1]cuda:0", div_16: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_261: "f32[768, 3072][3072, 1]cuda:0", permute_265: "f32[3072, 768][768, 1]cuda:0", div_17: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_269: "f32[768, 768][768, 1]cuda:0", permute_277: "f32[2304, 768][768, 1]cuda:0", div_18: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_281: "f32[768, 3072][3072, 1]cuda:0", permute_285: "f32[3072, 768][768, 1]cuda:0", div_19: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_289: "f32[768, 768][768, 1]cuda:0", permute_297: "f32[2304, 768][768, 1]cuda:0", div_20: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_301: "f32[768, 3072][3072, 1]cuda:0", permute_305: "f32[3072, 768][768, 1]cuda:0", div_21: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_309: "f32[768, 768][768, 1]cuda:0", permute_317: "f32[2304, 768][768, 1]cuda:0", div_22: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_321: "f32[768, 3072][3072, 1]cuda:0", permute_325: "f32[3072, 768][768, 1]cuda:0", div_23: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_329: "f32[768, 768][768, 1]cuda:0", permute_337: "f32[2304, 768][768, 1]cuda:0", div_24: "f32[1, 64, 1][64, 1, 1]cuda:0", tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"): + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head( + view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.reshape.default(tangents_1, [1, 50304]); tangents_1 = None + permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0]) + mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None + permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None + mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None + view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_2, [1, 1, 768]); mm_2 = None + permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :] + full_default_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put_.default(full_default_1, [None, full_default], view_147, True); full_default_1 = full_default = view_147 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, primals_148); primals_148 = None + mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768) + sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True) + mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_96); mul_99 = None + sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None + mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, sum_2); sum_2 = None + sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None + sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None + mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None + mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, mul_96); mul_96 = None + sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None + sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]); index_put = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(mul_103, [64, 768]) + mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None + permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0]) + mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None + permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None + sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None + view_149: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_5, [768]); sum_5 = None + permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None + view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_3, [1, 64, 3072]); mm_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_46, [1, 64, 3072]); addmm_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5) + mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0) + mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None + add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None + mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None + tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94); mul_94 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_11, tanh_11); tanh_11 = None + sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None + mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None + mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None + mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715) + pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None + mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None + mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_100, [64, 3072]); add_100 = None + mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None + permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0]) + mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None + permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None + sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None + view_152: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_6, [3072]); sum_6 = None + permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None + view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_5, [1, 64, 768]); mm_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None + mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768) + sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True) + mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_90); mul_115 = None + sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None + mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, sum_8); sum_8 = None + sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None + sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None + mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None + mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_90); mul_90 = None + sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None + sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_101, [64, 768]) + mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None + permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]) + view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_92, [1, 64, 768]); permute_92 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_137, [64, 768]); view_137 = None + mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None + permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None + sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None + view_155: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_11, [768]); sum_11 = None + permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None + view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_7, [1, 64, 768]); mm_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_156, [1, 64, 12, 64]); view_156 = None + permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None + getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0] + getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1] + getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None + view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_114, [1, 64, 768]); permute_114 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None + view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_115, [1, 64, 768]); permute_115 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None + view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_116, [1, 64, 768]); permute_116 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None + view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat, [64, 2304]); cat = None + mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None + permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0]) + mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None + permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None + sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None + view_162: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_12, [2304]); sum_12 = None + permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None + view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_9, [1, 64, 768]); mm_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None + mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768) + sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True) + mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_88); mul_122 = None + sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None + mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, sum_14); sum_14 = None + sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None + sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None + mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None + mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_88); mul_88 = None + sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None + sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_102, [64, 768]) + mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None + permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0]) + mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None + permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None + sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None + view_165: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_17, [768]); sum_17 = None + permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None + view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_11, [1, 64, 3072]); mm_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_42, [1, 64, 3072]); addmm_42 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5) + mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0) + mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None + add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None + mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None + tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86); mul_86 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_10, tanh_10); tanh_10 = None + sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None + mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None + mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None + mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715) + pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None + mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None + mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_104, [64, 3072]); add_104 = None + mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None + permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0]) + mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None + permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None + sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None + view_168: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_18, [3072]); sum_18 = None + permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None + view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_13, [1, 64, 768]); mm_13 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None + mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768) + sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True) + mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_82); mul_138 = None + sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None + mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, sum_20); sum_20 = None + sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None + sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None + mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None + mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_82); mul_82 = None + sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None + sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_105, [64, 768]) + mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None + permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]) + view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_84, [1, 64, 768]); permute_84 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_125, [64, 768]); view_125 = None + mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None + permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None + sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None + view_171: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_23, [768]); sum_23 = None + permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None + view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_15, [1, 64, 768]); mm_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_172, [1, 64, 12, 64]); view_172 = None + permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None + getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0] + getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1] + getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None + view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_134, [1, 64, 768]); permute_134 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None + view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_135, [1, 64, 768]); permute_135 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None + view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_136, [1, 64, 768]); permute_136 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None + view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_1, [64, 2304]); cat_1 = None + mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None + permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0]) + mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None + permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None + sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None + view_178: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_24, [2304]); sum_24 = None + permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None + view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_17, [1, 64, 768]); mm_17 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None + mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768) + sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True) + mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_80); mul_145 = None + sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None + mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, sum_26); sum_26 = None + sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None + sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None + mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None + mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_80); mul_80 = None + sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None + sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_106, [64, 768]) + mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None + permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0]) + mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None + permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None + sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None + view_181: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_29, [768]); sum_29 = None + permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None + view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_19, [1, 64, 3072]); mm_19 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_38, [1, 64, 3072]); addmm_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5) + mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0) + mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None + add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None + mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None + tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78); mul_78 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_9, tanh_9); tanh_9 = None + sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None + mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None + mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None + mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715) + pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None + mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None + mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_108, [64, 3072]); add_108 = None + mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None + permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0]) + mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None + permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None + sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None + view_184: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_30, [3072]); sum_30 = None + permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None + view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_21, [1, 64, 768]); mm_21 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None + mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768) + sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True) + mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_74); mul_161 = None + sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None + mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, sum_32); sum_32 = None + sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None + sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None + mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None + mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_74); mul_74 = None + sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None + sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_109, [64, 768]) + mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None + permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]) + view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_76, [1, 64, 768]); permute_76 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_113, [64, 768]); view_113 = None + mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None + permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None + sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None + view_187: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_35, [768]); sum_35 = None + permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None + view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_23, [1, 64, 768]); mm_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_188, [1, 64, 12, 64]); view_188 = None + permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None + getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0] + getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1] + getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None + view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_154, [1, 64, 768]); permute_154 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None + view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_155, [1, 64, 768]); permute_155 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None + view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_156, [1, 64, 768]); permute_156 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None + view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_2, [64, 2304]); cat_2 = None + mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None + permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0]) + mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None + permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None + sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None + view_194: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_36, [2304]); sum_36 = None + permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None + view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_25, [1, 64, 768]); mm_25 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None + mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768) + sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True) + mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_72); mul_168 = None + sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None + mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, sum_38); sum_38 = None + sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None + sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None + mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None + mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_72); mul_72 = None + sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None + sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_110, [64, 768]) + mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None + permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0]) + mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None + permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None + sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None + view_197: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_41, [768]); sum_41 = None + permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None + view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_27, [1, 64, 3072]); mm_27 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_34, [1, 64, 3072]); addmm_34 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5) + mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0) + mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None + add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None + mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None + tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70); mul_70 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_8, tanh_8); tanh_8 = None + sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None + mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None + mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None + mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715) + pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None + mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None + mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_112, [64, 3072]); add_112 = None + mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None + permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0]) + mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None + permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None + sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None + view_200: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_42, [3072]); sum_42 = None + permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None + view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_29, [1, 64, 768]); mm_29 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None + mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768) + sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True) + mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_66); mul_184 = None + sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None + mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, sum_44); sum_44 = None + sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None + sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None + mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None + mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_66); mul_66 = None + sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None + sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_113, [64, 768]) + mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None + permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]) + view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_68, [1, 64, 768]); permute_68 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_101, [64, 768]); view_101 = None + mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None + permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None + sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None + view_203: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_47, [768]); sum_47 = None + permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None + view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_31, [1, 64, 768]); mm_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_204, [1, 64, 12, 64]); view_204 = None + permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None + getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0] + getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1] + getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None + view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_174, [1, 64, 768]); permute_174 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None + view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_175, [1, 64, 768]); permute_175 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None + view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_176, [1, 64, 768]); permute_176 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None + view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_3, [64, 2304]); cat_3 = None + mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None + permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0]) + mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None + permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None + sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None + view_210: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_48, [2304]); sum_48 = None + permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None + view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_33, [1, 64, 768]); mm_33 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None + mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768) + sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True) + mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_64); mul_191 = None + sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None + mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, sum_50); sum_50 = None + sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None + sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None + mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None + mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_64); mul_64 = None + sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None + sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_114, [64, 768]) + mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None + permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0]) + mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None + permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None + sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None + view_213: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_53, [768]); sum_53 = None + permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None + view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_35, [1, 64, 3072]); mm_35 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_30, [1, 64, 3072]); addmm_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5) + mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0) + mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None + add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None + mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None + tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62); mul_62 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_7, tanh_7); tanh_7 = None + sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None + mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None + mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None + mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715) + pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None + mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None + mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_116, [64, 3072]); add_116 = None + mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None + permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0]) + mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None + permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None + sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None + view_216: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_54, [3072]); sum_54 = None + permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None + view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_37, [1, 64, 768]); mm_37 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None + mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768) + sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True) + mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_58); mul_207 = None + sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None + mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, sum_56); sum_56 = None + sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None + sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None + mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None + mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_58); mul_58 = None + sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None + sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_117, [64, 768]) + mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None + permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]) + view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_60, [1, 64, 768]); permute_60 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_89, [64, 768]); view_89 = None + mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None + permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None + sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None + view_219: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_59, [768]); sum_59 = None + permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None + view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_39, [1, 64, 768]); mm_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_220, [1, 64, 12, 64]); view_220 = None + permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None + getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0] + getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1] + getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None + view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_194, [1, 64, 768]); permute_194 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None + view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_195, [1, 64, 768]); permute_195 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None + view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_196, [1, 64, 768]); permute_196 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None + view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_4, [64, 2304]); cat_4 = None + mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None + permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0]) + mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None + permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None + sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None + view_226: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_60, [2304]); sum_60 = None + permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None + view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_41, [1, 64, 768]); mm_41 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None + mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768) + sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True) + mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_56); mul_214 = None + sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None + mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, sum_62); sum_62 = None + sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None + sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None + mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None + mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_56); mul_56 = None + sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None + sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_118, [64, 768]) + mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None + permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0]) + mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None + permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None + sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None + view_229: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_65, [768]); sum_65 = None + permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None + view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_43, [1, 64, 3072]); mm_43 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_26, [1, 64, 3072]); addmm_26 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5) + mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0) + mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None + add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None + mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None + tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54); mul_54 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_6, tanh_6); tanh_6 = None + sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None + mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None + mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None + mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715) + pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None + mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None + mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_120, [64, 3072]); add_120 = None + mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None + permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0]) + mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None + permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None + sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None + view_232: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_66, [3072]); sum_66 = None + permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None + view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_45, [1, 64, 768]); mm_45 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None + mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768) + sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True) + mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_50); mul_230 = None + sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None + mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, sum_68); sum_68 = None + sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None + sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None + mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None + mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_50); mul_50 = None + sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None + sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_121, [64, 768]) + mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None + permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]) + view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_52, [1, 64, 768]); permute_52 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_77, [64, 768]); view_77 = None + mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None + permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None + sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None + view_235: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_71, [768]); sum_71 = None + permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None + view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_47, [1, 64, 768]); mm_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_236, [1, 64, 12, 64]); view_236 = None + permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None + getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0] + getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1] + getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None + view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_214, [1, 64, 768]); permute_214 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None + view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_215, [1, 64, 768]); permute_215 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None + view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_216, [1, 64, 768]); permute_216 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None + view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_5, [64, 2304]); cat_5 = None + mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None + permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0]) + mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None + permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None + sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None + view_242: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_72, [2304]); sum_72 = None + permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None + view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_49, [1, 64, 768]); mm_49 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None + mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768) + sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True) + mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_48); mul_237 = None + sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None + mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, sum_74); sum_74 = None + sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None + sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None + mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None + mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_48); mul_48 = None + sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None + sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_122, [64, 768]) + mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None + permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0]) + mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None + permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None + sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None + view_245: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_77, [768]); sum_77 = None + permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None + view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_51, [1, 64, 3072]); mm_51 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_22, [1, 64, 3072]); addmm_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5) + mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0) + mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None + add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None + mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None + tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46); mul_46 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_5, tanh_5); tanh_5 = None + sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None + mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None + mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None + mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715) + pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None + mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None + mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_124, [64, 3072]); add_124 = None + mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None + permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0]) + mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None + permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None + sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None + view_248: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_78, [3072]); sum_78 = None + permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None + view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_53, [1, 64, 768]); mm_53 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None + mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768) + sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True) + mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_42); mul_253 = None + sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None + mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, sum_80); sum_80 = None + sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None + sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None + mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None + mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_42); mul_42 = None + sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None + sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_125, [64, 768]) + mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None + permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]) + view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_44, [1, 64, 768]); permute_44 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_65, [64, 768]); view_65 = None + mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None + permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None + sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None + view_251: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_83, [768]); sum_83 = None + permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None + view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_55, [1, 64, 768]); mm_55 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_252, [1, 64, 12, 64]); view_252 = None + permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None + getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0] + getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1] + getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None + view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_234, [1, 64, 768]); permute_234 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None + view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_235, [1, 64, 768]); permute_235 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None + view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_236, [1, 64, 768]); permute_236 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None + view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_6, [64, 2304]); cat_6 = None + mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None + permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0]) + mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None + permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None + sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None + view_258: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_84, [2304]); sum_84 = None + permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None + view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_57, [1, 64, 768]); mm_57 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None + mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768) + sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True) + mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_40); mul_260 = None + sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None + mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, sum_86); sum_86 = None + sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None + sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None + mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None + mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_40); mul_40 = None + sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None + sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_126, [64, 768]) + mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None + permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0]) + mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None + permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None + sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None + view_261: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_89, [768]); sum_89 = None + permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None + view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_59, [1, 64, 3072]); mm_59 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_18, [1, 64, 3072]); addmm_18 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5) + mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0) + mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None + add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None + mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None + tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38); mul_38 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_4, tanh_4); tanh_4 = None + sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None + mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None + mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None + mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715) + pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None + mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None + mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_128, [64, 3072]); add_128 = None + mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None + permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0]) + mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None + permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None + sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None + view_264: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_90, [3072]); sum_90 = None + permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None + view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_61, [1, 64, 768]); mm_61 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None + mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768) + sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True) + mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_34); mul_276 = None + sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None + mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, sum_92); sum_92 = None + sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None + sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None + mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None + mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_34); mul_34 = None + sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None + sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_129, [64, 768]) + mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None + permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]) + view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_36, [1, 64, 768]); permute_36 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_53, [64, 768]); view_53 = None + mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None + permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None + sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None + view_267: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_95, [768]); sum_95 = None + permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None + view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_63, [1, 64, 768]); mm_63 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_268, [1, 64, 12, 64]); view_268 = None + permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None + getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0] + getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1] + getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None + view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_254, [1, 64, 768]); permute_254 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None + view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_255, [1, 64, 768]); permute_255 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None + view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_256, [1, 64, 768]); permute_256 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None + view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_7, [64, 2304]); cat_7 = None + mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None + permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0]) + mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None + permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None + sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None + view_274: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_96, [2304]); sum_96 = None + permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None + view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_65, [1, 64, 768]); mm_65 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None + mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768) + sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True) + mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_32); mul_283 = None + sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None + mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, sum_98); sum_98 = None + sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None + sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None + mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None + mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_32); mul_32 = None + sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None + sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_130, [64, 768]) + mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None + permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0]) + mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None + permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None + sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None + view_277: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_101, [768]); sum_101 = None + permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None + view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_67, [1, 64, 3072]); mm_67 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_14, [1, 64, 3072]); addmm_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5) + mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0) + mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None + add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None + mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None + tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30); mul_30 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_3, tanh_3); tanh_3 = None + sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None + mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None + mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None + mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715) + pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None + mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None + mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_132, [64, 3072]); add_132 = None + mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None + permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0]) + mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None + permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None + sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None + view_280: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_102, [3072]); sum_102 = None + permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None + view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_69, [1, 64, 768]); mm_69 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None + mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768) + sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True) + mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_26); mul_299 = None + sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None + mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, sum_104); sum_104 = None + sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None + sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None + mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None + mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_26); mul_26 = None + sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None + sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_133, [64, 768]) + mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None + permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]) + view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_28, [1, 64, 768]); permute_28 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_41, [64, 768]); view_41 = None + mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None + permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None + sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None + view_283: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_107, [768]); sum_107 = None + permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None + view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_71, [1, 64, 768]); mm_71 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_284, [1, 64, 12, 64]); view_284 = None + permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None + getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0] + getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1] + getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None + view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_274, [1, 64, 768]); permute_274 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None + view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_275, [1, 64, 768]); permute_275 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None + view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_276, [1, 64, 768]); permute_276 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None + view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_8, [64, 2304]); cat_8 = None + mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None + permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0]) + mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None + permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None + sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None + view_290: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_108, [2304]); sum_108 = None + permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None + view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_73, [1, 64, 768]); mm_73 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None + mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768) + sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True) + mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_24); mul_306 = None + sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None + mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, sum_110); sum_110 = None + sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None + sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None + mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None + mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_24); mul_24 = None + sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None + sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_134, [64, 768]) + mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None + permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0]) + mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None + permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None + sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None + view_293: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_113, [768]); sum_113 = None + permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None + view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_75, [1, 64, 3072]); mm_75 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_10, [1, 64, 3072]); addmm_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5) + mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0) + mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None + add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None + mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None + tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22); mul_22 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_2, tanh_2); tanh_2 = None + sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None + mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None + mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None + mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715) + pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None + mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None + mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_136, [64, 3072]); add_136 = None + mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None + permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0]) + mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None + permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None + sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None + view_296: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_114, [3072]); sum_114 = None + permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None + view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_77, [1, 64, 768]); mm_77 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None + mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768) + sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True) + mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_18); mul_322 = None + sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None + mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, sum_116); sum_116 = None + sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None + sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None + mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None + mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_18); mul_18 = None + sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None + sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_137, [64, 768]) + mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None + permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]) + view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_20, [1, 64, 768]); permute_20 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_29, [64, 768]); view_29 = None + mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None + permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None + sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None + view_299: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_119, [768]); sum_119 = None + permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None + view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_79, [1, 64, 768]); mm_79 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_300, [1, 64, 12, 64]); view_300 = None + permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None + getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0] + getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1] + getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None + view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_294, [1, 64, 768]); permute_294 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None + view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_295, [1, 64, 768]); permute_295 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None + view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_296, [1, 64, 768]); permute_296 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None + view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_9, [64, 2304]); cat_9 = None + mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None + permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0]) + mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None + permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None + sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None + view_306: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_120, [2304]); sum_120 = None + permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None + view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_81, [1, 64, 768]); mm_81 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None + mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768) + sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True) + mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_16); mul_329 = None + sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None + mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, sum_122); sum_122 = None + sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None + sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None + mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None + mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_16); mul_16 = None + sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None + sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_138, [64, 768]) + mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None + permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0]) + mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None + permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None + sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None + view_309: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_125, [768]); sum_125 = None + permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None + view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_83, [1, 64, 3072]); mm_83 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_6, [1, 64, 3072]); addmm_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5) + mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0) + mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None + add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None + mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None + tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14); mul_14 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_1, tanh_1); tanh_1 = None + sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None + mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None + mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None + mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715) + pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None + mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None + mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_140, [64, 3072]); add_140 = None + mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None + permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0]) + mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None + permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None + sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None + view_312: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_126, [3072]); sum_126 = None + permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None + view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_85, [1, 64, 768]); mm_85 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None + mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768) + sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True) + mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_10); mul_345 = None + sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None + mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, sum_128); sum_128 = None + sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None + sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None + mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None + mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_10); mul_10 = None + sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None + sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_141, [64, 768]) + mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None + permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]) + view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_12, [1, 64, 768]); permute_12 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_17, [64, 768]); view_17 = None + mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None + permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None + sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None + view_315: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_131, [768]); sum_131 = None + permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None + view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_87, [1, 64, 768]); mm_87 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_316, [1, 64, 12, 64]); view_316 = None + permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None + getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0] + getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1] + getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None + view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_314, [1, 64, 768]); permute_314 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None + view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_315, [1, 64, 768]); permute_315 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None + view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_316, [1, 64, 768]); permute_316 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None + view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_10, [64, 2304]); cat_10 = None + mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None + permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0]) + mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None + permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None + sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None + view_322: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_132, [2304]); sum_132 = None + permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None + view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_89, [1, 64, 768]); mm_89 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None + mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768) + sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True) + mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_8); mul_352 = None + sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None + mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, sum_134); sum_134 = None + sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None + sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None + mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None + mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_8); mul_8 = None + sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None + sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x) + view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_142, [64, 768]) + mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None + permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0]) + mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None + permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None + sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None + view_325: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_137, [768]); sum_137 = None + permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None + view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_91, [1, 64, 3072]); mm_91 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_2, [1, 64, 3072]); addmm_2 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5) + mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0) + mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None + add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None + mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None + tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6); mul_6 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0 + add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh, tanh); tanh = None + sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None + mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None + mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None + mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715) + pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None + mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None + mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))) + add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5 + add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x) + view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_144, [64, 3072]); add_144 = None + mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None + permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0]) + mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None + permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None + sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None + view_328: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_138, [3072]); sum_138 = None + permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None + view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_93, [1, 64, 768]); mm_93 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None + mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768) + sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True) + mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_2); mul_368 = None + sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None + mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, sum_140); sum_140 = None + sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None + sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None + mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None + mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_2); mul_2 = None + sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None + sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_145, [64, 768]) + mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None + permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0]) + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]) + view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_4, [1, 64, 768]); permute_4 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y)) + view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_5, [64, 768]); view_5 = None + mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None + permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None + sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None + view_331: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_143, [768]); sum_143 = None + permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None + view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_95, [1, 64, 768]); mm_95 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C) + view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_332, [1, 64, 12, 64]); view_332 = None + permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention( + _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None + getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0] + getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1] + getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose( + permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None + view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_334, [1, 64, 768]); permute_334 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose( + permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None + view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_335, [1, 64, 768]); permute_335 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose( + permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None + view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_336, [1, 64, 768]); permute_336 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2) + cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None + view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_11, [64, 2304]); cat_11 = None + mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None + permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0]) + mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None + permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None + sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None + view_338: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_144, [2304]); sum_144 = None + permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None + view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_97, [1, 64, 768]); mm_97 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None + mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768) + sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True) + mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul); mul_375 = None + sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None + mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, sum_146); sum_146 = None + sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None + sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None + mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None + mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul); mul = None + sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None + sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) + add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe( + eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1) + unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1); eq = None + full_default_4: "f32[][]cuda:0" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146); unsqueeze_1 = None + full_default_5: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.prims._unsafe_index_put_.default(full_default_5, [unsqueeze], where, True); full_default_5 = unsqueeze = where = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1) + unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None + where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146); unsqueeze_2 = full_default_4 = add_146 = None + full_default_7: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.prims._unsafe_index_put_.default(full_default_7, [primals_1], where_1, True); full_default_7 = primals_1 = where_1 = None + + # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None + return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4) + +V0806 13:56:02.116000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2d283a33d935475de25dab047e665ade"} + { + "name": "GraphLowering.run", + "ts": 1722977762116083.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:02.861000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a0389aeaab0d4cb8071bd0e8e6e32f0d"} + { + "name": "GraphLowering.run", + "ts": 1722977762860959.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:02.862000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "803085ff029a11e4af9c770d41ce84f2"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977762862673.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:02.862000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7712dce48f8ad8817e3e170b1e961c78"} + { + "name": "code_gen", + "ts": 1722977762862772.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:02.875000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0dca915629fd3e1219b6c3b78fe58cee"} + { + "name": "Scheduler.__init__", + "ts": 1722977762875354.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:03.608000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a0700e4f4b7d20071f95c979dc81edc1"} + { + "name": "Scheduler.__init__", + "ts": 1722977763608181.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:03.608000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a788ecd1af90be6a09828353a73bab87"} + { + "name": "Scheduler.codegen", + "ts": 1722977763608547.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:04.390000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d9669c0f1d50d3459be407594119c03f"} + { + "name": "Scheduler.codegen", + "ts": 1722977764390154.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:04.390000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e9416b1bde8810f193b722a890680cd4"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977764390432.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:04.425000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c99ea59320891263583aa74dbedac343"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977764425384.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:04.426000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/52/c52u5qz657ryymezk4izvpue77cek4zew6xe5neasnfjpwi55xyg.py"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "d80ad5ece7cde8b3547e3f1f8b38e0f2"} + + # AOT ID: ['0_backward'] + from ctypes import c_void_p, c_long + import torch + import math + import random + import os + import tempfile + from math import inf, nan + from torch._inductor.hooks import run_intermediate_hooks + from torch._inductor.utils import maybe_profile + from torch._inductor.codegen.memory_planning import _align as align + + from torch import device, empty_strided + from torch._inductor.async_compile import AsyncCompile + from torch._inductor.select_algorithm import extern_kernels + from torch._inductor.codegen.multi_kernel import MultiKernelCall + + aten = torch.ops.aten + inductor_ops = torch.ops.inductor + _quantized = torch.ops._quantized + assert_size_stride = torch._C._dynamo.guards.assert_size_stride + empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu + empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda + reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor + alloc_from_pool = torch.ops.inductor._alloc_from_pool + async_compile = AsyncCompile() + + + # kernel path: /tmp/tmp2ln889l5/6p/c6paekzbgxqomhhj7h4ugorx23to3z4chkxlso6qupohk2b4t6gi.py + # Source Nodes: [], Original ATen: [aten.new_zeros] + + triton_poi_fused_new_zeros_0 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[65536], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_new_zeros_0', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 49152 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = 0.0 + tl.store(out_ptr0 + (x0), tmp0, None) + ''', device_str='cuda') + + import triton + import triton.language as tl + from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph + from torch._C import _cuda_getCurrentRawStream as get_raw_stream + + + # kernel path: /tmp/tmp2ln889l5/oo/coocewyva7nm7367uk2izabwjppr6kuwmmklcabvda7srgo2yjlt.py + # Source Nodes: [], Original ATen: [aten.index_put, aten.new_zeros] + + triton_poi_fused_index_put_new_zeros_1 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[1024], + filename=__file__, + triton_meta={'signature': {0: '*i64', 1: '*fp32', 2: '*fp32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_index_put_new_zeros_1', 'mutated_arg_names': ['out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 768 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (0)) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK]) + tmp7 = tl.load(in_ptr1 + (x0), xmask) + tmp2 = tl.full([XBLOCK], 64, tl.int32) + tmp3 = tmp1 + tmp2 + tmp4 = tmp1 < 0 + tmp5 = tl.where(tmp4, tmp3, tmp1) + tl.device_assert((0 <= tmp5) & (tmp5 < 64), "index out of bounds: 0 <= tmp5 < 64") + tl.atomic_add(out_ptr0 + (x0 + (768*tmp5)), tmp7, xmask, sem='relaxed') + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/cz/cczmlfpjyv3sv4qsg47uepi425idtn7lrru6wzh6tqpngryvmley.py + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + + triton_per_fused_native_layer_norm_backward_2 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_native_layer_norm_backward_2', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 4, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, out_ptr2, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0) + tmp13 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last') + tmp2 = tmp0 * tmp1 + tmp3 = tl.broadcast_to(tmp2, [RBLOCK]) + tmp5 = tl.where(rmask, tmp3, 0) + tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0)) + tmp8 = tmp2 * tmp7 + tmp9 = tl.broadcast_to(tmp8, [RBLOCK]) + tmp11 = tl.where(rmask, tmp9, 0) + tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0)) + tmp14 = 768.0 + tmp15 = tmp2 * tmp14 + tmp16 = tmp15 - tmp6 + tmp17 = tmp7 * tmp12 + tmp18 = tmp16 - tmp17 + tmp19 = tmp13 * tmp18 + tl.store(out_ptr2 + (r1 + (768*x0)), tmp19, rmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/ha/chal7ghh7ewcpd75gvkwva2pij3u5ofczztskcbubk3ioaefsaz6.py + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + + triton_per_fused_native_layer_norm_backward_3 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[1024, 64], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32', 5: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_native_layer_norm_backward_3', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, out_ptr0, out_ptr1, xnumel, rnumel, XBLOCK : tl.constexpr): + xnumel = 768 + rnumel = 64 + RBLOCK: tl.constexpr = 64 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:, None] + xmask = xindex < xnumel + rindex = tl.arange(0, RBLOCK)[None, :] + roffset = 0 + rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1) + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0 + (768*r1)), xmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (x0 + (768*r1)), xmask, other=0.0) + tmp2 = tmp0 * tmp1 + tmp3 = tl.broadcast_to(tmp2, [XBLOCK, RBLOCK]) + tmp5 = tl.where(xmask, tmp3, 0) + tmp6 = tl.sum(tmp5, 1)[:, None] + tmp7 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK]) + tmp9 = tl.where(xmask, tmp7, 0) + tmp10 = tl.sum(tmp9, 1)[:, None] + tl.store(out_ptr0 + (x0), tmp6, xmask) + tl.store(out_ptr1 + (x0), tmp10, xmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/oq/coq4a53onebiitbgr5qfnlwe4gtsq6tfdkhu4jc2ah74b22q6frr.py + # Source Nodes: [], Original ATen: [aten.sum] + + triton_per_fused_sum_4 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[1024, 64], + reduction_hint=ReductionHint.OUTER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_4', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr): + xnumel = 768 + rnumel = 64 + RBLOCK: tl.constexpr = 64 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:, None] + xmask = xindex < xnumel + rindex = tl.arange(0, RBLOCK)[None, :] + roffset = 0 + rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1) + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0 + (768*r1)), xmask, other=0.0) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK]) + tmp3 = tl.where(xmask, tmp1, 0) + tmp4 = tl.sum(tmp3, 1)[:, None] + tl.store(out_ptr0 + (x0), tmp4, xmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/6d/c6doghxt344wzvcsvrtcaftqayqnzocngd56v2yhsnz3v5eopk2f.py + # Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + # add_46 => add_94 + # add_47 => add_95 + # mul_44 => mul_92 + # mul_45 => mul_93 + # mul_46 => mul_94 + # pow_12 => pow_12 + # tanh_11 => tanh_11 + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[262144], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_mul_pow_tanh_tanh_backward_5', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 196608 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_out_ptr0 + (x0), None) + tmp1 = tl.load(in_ptr0 + (x0), None) + tmp2 = 0.5 + tmp3 = tmp1 * tmp2 + tmp4 = tmp0 * tmp3 + tmp5 = tmp1 * tmp1 + tmp6 = tmp5 * tmp1 + tmp7 = 0.044715 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 + tmp8 + tmp10 = 0.7978845608028654 + tmp11 = tmp9 * tmp10 + tmp12 = libdevice.tanh(tmp11) + tmp13 = tmp12 * tmp12 + tmp14 = 1.0 + tmp15 = tmp14 - tmp13 + tmp16 = tmp4 * tmp15 + tmp17 = tmp16 * tmp10 + tmp18 = tmp17 * tmp7 + tmp19 = 3.0 + tmp20 = tmp5 * tmp19 + tmp21 = tmp18 * tmp20 + tmp22 = tmp17 + tmp21 + tmp23 = tmp12 + tmp14 + tmp24 = tmp0 * tmp23 + tmp25 = tmp24 * tmp2 + tmp26 = tmp22 + tmp25 + tl.store(in_out_ptr0 + (x0), tmp26, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/tp/ctpgg42pkdcfpn2vurm3qnozt4yd6qbtncxrhvxbhiy6psqbvvig.py + # Source Nodes: [], Original ATen: [aten.sum] + + triton_per_fused_sum_6 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[4096, 64], + reduction_hint=ReductionHint.OUTER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_6', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr): + xnumel = 3072 + rnumel = 64 + RBLOCK: tl.constexpr = 64 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:, None] + xmask = xindex < xnumel + rindex = tl.arange(0, RBLOCK)[None, :] + roffset = 0 + rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1) + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0 + (3072*r1)), xmask, other=0.0) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK]) + tmp3 = tl.where(xmask, tmp1, 0) + tmp4 = tl.sum(tmp3, 1)[:, None] + tl.store(out_ptr0 + (x0), tmp4, xmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/vc/cvcsfabswaorzljwigmguorguvjnz5ygwefg5jpn2pdnzjiyiunq.py + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + + triton_per_fused_add_native_layer_norm_backward_7 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_backward_7', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 5, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0) + tmp13 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp14 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last') + tmp2 = tmp0 * tmp1 + tmp3 = tl.broadcast_to(tmp2, [RBLOCK]) + tmp5 = tl.where(rmask, tmp3, 0) + tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0)) + tmp8 = tmp2 * tmp7 + tmp9 = tl.broadcast_to(tmp8, [RBLOCK]) + tmp11 = tl.where(rmask, tmp9, 0) + tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0)) + tmp15 = 768.0 + tmp16 = tmp2 * tmp15 + tmp17 = tmp16 - tmp6 + tmp18 = tmp7 * tmp12 + tmp19 = tmp17 - tmp18 + tmp20 = tmp14 * tmp19 + tmp21 = tmp13 + tmp20 + tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp21, rmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/ar/cardc3vznbmvvhhk6ledk62xbqafohguorygjv35ygndxybgtp4v.py + # Source Nodes: [], Original ATen: [aten.cat] + + triton_poi_fused_cat_8 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[262144], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_cat_8', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 147456 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex % 2304 + x1 = (xindex // 2304) + x2 = xindex + tmp0 = x0 + tmp1 = tl.full([1], 0, tl.int64) + tmp2 = tmp0 >= tmp1 + tmp3 = tl.full([1], 768, tl.int64) + tmp4 = tmp0 < tmp3 + tmp5 = tl.load(in_ptr0 + ((768*x1) + x0), tmp4, eviction_policy='evict_last', other=0.0) + tmp6 = tmp0 >= tmp3 + tmp7 = tl.full([1], 1536, tl.int64) + tmp8 = tmp0 < tmp7 + tmp9 = tmp6 & tmp8 + tmp10 = tl.load(in_ptr1 + ((768*x1) + ((-768) + x0)), tmp9, eviction_policy='evict_last', other=0.0) + tmp11 = tmp0 >= tmp7 + tmp12 = tl.full([1], 2304, tl.int64) + tmp13 = tmp0 < tmp12 + tmp14 = tl.load(in_ptr2 + ((768*x1) + ((-1536) + x0)), tmp11, eviction_policy='evict_last', other=0.0) + tmp15 = tl.where(tmp9, tmp10, tmp14) + tmp16 = tl.where(tmp4, tmp5, tmp15) + tl.store(out_ptr0 + (x2), tmp16, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/zz/czzbnoytzj35xlzlzkxyfssawly4isbfgis7wkt5vorq5bjtvkuj.py + # Source Nodes: [], Original ATen: [aten.sum] + + triton_per_fused_sum_9 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[4096, 64], + reduction_hint=ReductionHint.OUTER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_9', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr): + xnumel = 2304 + rnumel = 64 + RBLOCK: tl.constexpr = 64 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:, None] + xmask = xindex < xnumel + rindex = tl.arange(0, RBLOCK)[None, :] + roffset = 0 + rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1) + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0 + (2304*r1)), xmask, other=0.0) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK]) + tmp3 = tl.where(xmask, tmp1, 0) + tmp4 = tl.sum(tmp3, 1)[:, None] + tl.store(out_ptr0 + (x0), tmp4, xmask) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/es/ces5mfu37mqyysd7bqz7jj4nzmf5iy36y733ltbjl32z622kywum.py + # Source Nodes: [], Original ATen: [aten.embedding_dense_backward] + + triton_poi_fused_embedding_dense_backward_10 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[1048576], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_embedding_dense_backward_10', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 786432 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = 0.0 + tl.store(out_ptr0 + (x0), tmp0, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/fb/cfbt4gzmmsu52tt6jq2ejhclvei7g2bvocxc3tptmo2jt2vjqwrw.py + # Source Nodes: [], Original ATen: [aten.embedding_dense_backward] + + triton_poi_fused_embedding_dense_backward_11 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[67108864], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_embedding_dense_backward_11', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 38633472 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = 0.0 + tl.store(out_ptr0 + (x0), tmp0, None) + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/ax/cax4blfw6cfz2t7paehqtfiuyq5xduvu52edbcakzqd6prmrjf5o.py + # Source Nodes: [], Original ATen: [aten.add, aten.embedding_dense_backward, aten.native_layer_norm_backward] + + triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.persistent_reduction( + size_hints=[64, 1024], + reduction_hint=ReductionHint.INNER, + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*i64', 6: '*i64', 7: '*fp32', 8: '*fp32', 9: 'i32', 10: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12', 'mutated_arg_names': ['in_out_ptr0', 'out_ptr2', 'out_ptr3'], 'no_x_dim': True, 'num_load': 7, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False} + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, out_ptr2, out_ptr3, xnumel, rnumel): + xnumel = 64 + XBLOCK: tl.constexpr = 1 + rnumel = 768 + RBLOCK: tl.constexpr = 1024 + xoffset = tl.program_id(0) * XBLOCK + xindex = tl.full([1], xoffset, tl.int32) + xmask = tl.full([RBLOCK], True, tl.int1) + rindex = tl.arange(0, RBLOCK)[:] + roffset = 0 + rmask = rindex < rnumel + r1 = rindex + x0 = xindex + tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0) + tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0) + tmp13 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0) + tmp14 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last') + tmp22 = tl.load(in_ptr4 + (x0), None, eviction_policy='evict_last') + tmp31 = tl.load(in_ptr5 + (x0), None, eviction_policy='evict_last') + tmp2 = tmp0 * tmp1 + tmp3 = tl.broadcast_to(tmp2, [RBLOCK]) + tmp5 = tl.where(rmask, tmp3, 0) + tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0)) + tmp8 = tmp2 * tmp7 + tmp9 = tl.broadcast_to(tmp8, [RBLOCK]) + tmp11 = tl.where(rmask, tmp9, 0) + tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0)) + tmp15 = 768.0 + tmp16 = tmp2 * tmp15 + tmp17 = tmp16 - tmp6 + tmp18 = tmp7 * tmp12 + tmp19 = tmp17 - tmp18 + tmp20 = tmp14 * tmp19 + tmp21 = tmp13 + tmp20 + tmp23 = tl.full([RBLOCK], 1024, tl.int32) + tmp24 = tmp22 + tmp23 + tmp25 = tmp22 < 0 + tmp26 = tl.where(tmp25, tmp24, tmp22) + tmp27 = tl.full([1], -1, tl.int64) + tmp28 = tmp22 == tmp27 + tmp29 = 0.0 + tmp30 = tl.where(tmp28, tmp29, tmp21) + tmp32 = tl.full([RBLOCK], 50304, tl.int32) + tmp33 = tmp31 + tmp32 + tmp34 = tmp31 < 0 + tmp35 = tl.where(tmp34, tmp33, tmp31) + tmp36 = tmp31 == tmp27 + tmp37 = tl.where(tmp36, tmp29, tmp21) + tl.atomic_add(out_ptr2 + (tl.broadcast_to(r1 + (768*tmp26), [RBLOCK])), tmp30, rmask, sem='relaxed') + tl.atomic_add(out_ptr3 + (tl.broadcast_to(r1 + (768*tmp35), [RBLOCK])), tmp37, rmask, sem='relaxed') + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/os/cosr4qs4msiqxzvk25xnlu3ejk5hkeeghm6yyzr6fh3yejtndtoa.py + # Source Nodes: [], Original ATen: [aten.add] + + triton_poi_fused_add_13 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.pointwise( + size_hints=[67108864], + filename=__file__, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]}, + inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_13', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + min_elem_per_thread=0 + ) + @triton.jit + def triton_(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr): + xnumel = 38633472 + xoffset = tl.program_id(0) * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_out_ptr0 + (x0), None) + tmp1 = tl.load(in_ptr0 + (x0), None) + tmp2 = tmp0 + tmp1 + tl.store(in_out_ptr0 + (x0), tmp2, None) + ''', device_str='cuda') + + + async_compile.wait(globals()) + del async_compile + + def call(args): + primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1 = args + args.clear() + assert_size_stride(primals_1, (1, 64), (64, 1)) + assert_size_stride(primals_4, (768, ), (1, )) + assert_size_stride(primals_10, (768, ), (1, )) + assert_size_stride(primals_16, (768, ), (1, )) + assert_size_stride(primals_22, (768, ), (1, )) + assert_size_stride(primals_28, (768, ), (1, )) + assert_size_stride(primals_34, (768, ), (1, )) + assert_size_stride(primals_40, (768, ), (1, )) + assert_size_stride(primals_46, (768, ), (1, )) + assert_size_stride(primals_52, (768, ), (1, )) + assert_size_stride(primals_58, (768, ), (1, )) + assert_size_stride(primals_64, (768, ), (1, )) + assert_size_stride(primals_70, (768, ), (1, )) + assert_size_stride(primals_76, (768, ), (1, )) + assert_size_stride(primals_82, (768, ), (1, )) + assert_size_stride(primals_88, (768, ), (1, )) + assert_size_stride(primals_94, (768, ), (1, )) + assert_size_stride(primals_100, (768, ), (1, )) + assert_size_stride(primals_106, (768, ), (1, )) + assert_size_stride(primals_112, (768, ), (1, )) + assert_size_stride(primals_118, (768, ), (1, )) + assert_size_stride(primals_124, (768, ), (1, )) + assert_size_stride(primals_130, (768, ), (1, )) + assert_size_stride(primals_136, (768, ), (1, )) + assert_size_stride(primals_142, (768, ), (1, )) + assert_size_stride(primals_148, (768, ), (1, )) + assert_size_stride(unsqueeze, (1, 64), (64, 1)) + assert_size_stride(mul, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view, (64, 768), (768, 1)) + assert_size_stride(permute_1, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_2, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_3, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_5, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_6, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_7, (), ()) + assert_size_stride(getitem_8, (), ()) + assert_size_stride(mul_2, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_8, (64, 768), (768, 1)) + assert_size_stride(addmm_2, (64, 3072), (3072, 1)) + assert_size_stride(view_10, (64, 3072), (3072, 1)) + assert_size_stride(mul_8, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_12, (64, 768), (768, 1)) + assert_size_stride(permute_9, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_10, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_11, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_16, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_17, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_18, (), ()) + assert_size_stride(getitem_19, (), ()) + assert_size_stride(mul_10, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_20, (64, 768), (768, 1)) + assert_size_stride(addmm_6, (64, 3072), (3072, 1)) + assert_size_stride(view_22, (64, 3072), (3072, 1)) + assert_size_stride(mul_16, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_24, (64, 768), (768, 1)) + assert_size_stride(permute_17, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_18, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_19, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_27, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_28, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_29, (), ()) + assert_size_stride(getitem_30, (), ()) + assert_size_stride(mul_18, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_32, (64, 768), (768, 1)) + assert_size_stride(addmm_10, (64, 3072), (3072, 1)) + assert_size_stride(view_34, (64, 3072), (3072, 1)) + assert_size_stride(mul_24, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_36, (64, 768), (768, 1)) + assert_size_stride(permute_25, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_26, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_27, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_38, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_39, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_40, (), ()) + assert_size_stride(getitem_41, (), ()) + assert_size_stride(mul_26, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_44, (64, 768), (768, 1)) + assert_size_stride(addmm_14, (64, 3072), (3072, 1)) + assert_size_stride(view_46, (64, 3072), (3072, 1)) + assert_size_stride(mul_32, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_48, (64, 768), (768, 1)) + assert_size_stride(permute_33, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_34, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_35, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_49, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_50, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_51, (), ()) + assert_size_stride(getitem_52, (), ()) + assert_size_stride(mul_34, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_56, (64, 768), (768, 1)) + assert_size_stride(addmm_18, (64, 3072), (3072, 1)) + assert_size_stride(view_58, (64, 3072), (3072, 1)) + assert_size_stride(mul_40, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_60, (64, 768), (768, 1)) + assert_size_stride(permute_41, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_42, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_43, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_60, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_61, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_62, (), ()) + assert_size_stride(getitem_63, (), ()) + assert_size_stride(mul_42, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_68, (64, 768), (768, 1)) + assert_size_stride(addmm_22, (64, 3072), (3072, 1)) + assert_size_stride(view_70, (64, 3072), (3072, 1)) + assert_size_stride(mul_48, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_72, (64, 768), (768, 1)) + assert_size_stride(permute_49, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_50, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_51, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_71, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_72, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_73, (), ()) + assert_size_stride(getitem_74, (), ()) + assert_size_stride(mul_50, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_80, (64, 768), (768, 1)) + assert_size_stride(addmm_26, (64, 3072), (3072, 1)) + assert_size_stride(view_82, (64, 3072), (3072, 1)) + assert_size_stride(mul_56, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_84, (64, 768), (768, 1)) + assert_size_stride(permute_57, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_58, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_59, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_82, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_83, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_84, (), ()) + assert_size_stride(getitem_85, (), ()) + assert_size_stride(mul_58, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_92, (64, 768), (768, 1)) + assert_size_stride(addmm_30, (64, 3072), (3072, 1)) + assert_size_stride(view_94, (64, 3072), (3072, 1)) + assert_size_stride(mul_64, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_96, (64, 768), (768, 1)) + assert_size_stride(permute_65, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_66, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_67, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_93, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_94, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_95, (), ()) + assert_size_stride(getitem_96, (), ()) + assert_size_stride(mul_66, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_104, (64, 768), (768, 1)) + assert_size_stride(addmm_34, (64, 3072), (3072, 1)) + assert_size_stride(view_106, (64, 3072), (3072, 1)) + assert_size_stride(mul_72, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_108, (64, 768), (768, 1)) + assert_size_stride(permute_73, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_74, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_75, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_104, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_105, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_106, (), ()) + assert_size_stride(getitem_107, (), ()) + assert_size_stride(mul_74, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_116, (64, 768), (768, 1)) + assert_size_stride(addmm_38, (64, 3072), (3072, 1)) + assert_size_stride(view_118, (64, 3072), (3072, 1)) + assert_size_stride(mul_80, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_120, (64, 768), (768, 1)) + assert_size_stride(permute_81, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_82, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_83, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_115, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_116, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_117, (), ()) + assert_size_stride(getitem_118, (), ()) + assert_size_stride(mul_82, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_128, (64, 768), (768, 1)) + assert_size_stride(addmm_42, (64, 3072), (3072, 1)) + assert_size_stride(view_130, (64, 3072), (3072, 1)) + assert_size_stride(mul_88, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_132, (64, 768), (768, 1)) + assert_size_stride(permute_89, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_90, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(permute_91, (1, 12, 64, 64), (147456, 64, 2304, 1)) + assert_size_stride(getitem_126, (1, 12, 64, 64), (49152, 64, 768, 1)) + assert_size_stride(getitem_127, (1, 12, 64), (768, 64, 1)) + assert_size_stride(getitem_128, (), ()) + assert_size_stride(getitem_129, (), ()) + assert_size_stride(mul_90, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(view_140, (64, 768), (768, 1)) + assert_size_stride(addmm_46, (64, 3072), (3072, 1)) + assert_size_stride(view_142, (64, 3072), (3072, 1)) + assert_size_stride(mul_96, (1, 64, 768), (49152, 768, 1)) + assert_size_stride(full_default, (1, ), (1, )) + assert_size_stride(view_144, (1, 768), (768, 1)) + assert_size_stride(permute_99, (50304, 768), (768, 1)) + assert_size_stride(div, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_101, (768, 3072), (3072, 1)) + assert_size_stride(permute_105, (3072, 768), (768, 1)) + assert_size_stride(div_1, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_109, (768, 768), (768, 1)) + assert_size_stride(permute_117, (2304, 768), (768, 1)) + assert_size_stride(div_2, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_121, (768, 3072), (3072, 1)) + assert_size_stride(permute_125, (3072, 768), (768, 1)) + assert_size_stride(div_3, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_129, (768, 768), (768, 1)) + assert_size_stride(permute_137, (2304, 768), (768, 1)) + assert_size_stride(div_4, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_141, (768, 3072), (3072, 1)) + assert_size_stride(permute_145, (3072, 768), (768, 1)) + assert_size_stride(div_5, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_149, (768, 768), (768, 1)) + assert_size_stride(permute_157, (2304, 768), (768, 1)) + assert_size_stride(div_6, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_161, (768, 3072), (3072, 1)) + assert_size_stride(permute_165, (3072, 768), (768, 1)) + assert_size_stride(div_7, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_169, (768, 768), (768, 1)) + assert_size_stride(permute_177, (2304, 768), (768, 1)) + assert_size_stride(div_8, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_181, (768, 3072), (3072, 1)) + assert_size_stride(permute_185, (3072, 768), (768, 1)) + assert_size_stride(div_9, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_189, (768, 768), (768, 1)) + assert_size_stride(permute_197, (2304, 768), (768, 1)) + assert_size_stride(div_10, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_201, (768, 3072), (3072, 1)) + assert_size_stride(permute_205, (3072, 768), (768, 1)) + assert_size_stride(div_11, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_209, (768, 768), (768, 1)) + assert_size_stride(permute_217, (2304, 768), (768, 1)) + assert_size_stride(div_12, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_221, (768, 3072), (3072, 1)) + assert_size_stride(permute_225, (3072, 768), (768, 1)) + assert_size_stride(div_13, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_229, (768, 768), (768, 1)) + assert_size_stride(permute_237, (2304, 768), (768, 1)) + assert_size_stride(div_14, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_241, (768, 3072), (3072, 1)) + assert_size_stride(permute_245, (3072, 768), (768, 1)) + assert_size_stride(div_15, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_249, (768, 768), (768, 1)) + assert_size_stride(permute_257, (2304, 768), (768, 1)) + assert_size_stride(div_16, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_261, (768, 3072), (3072, 1)) + assert_size_stride(permute_265, (3072, 768), (768, 1)) + assert_size_stride(div_17, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_269, (768, 768), (768, 1)) + assert_size_stride(permute_277, (2304, 768), (768, 1)) + assert_size_stride(div_18, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_281, (768, 3072), (3072, 1)) + assert_size_stride(permute_285, (3072, 768), (768, 1)) + assert_size_stride(div_19, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_289, (768, 768), (768, 1)) + assert_size_stride(permute_297, (2304, 768), (768, 1)) + assert_size_stride(div_20, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_301, (768, 3072), (3072, 1)) + assert_size_stride(permute_305, (3072, 768), (768, 1)) + assert_size_stride(div_21, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_309, (768, 768), (768, 1)) + assert_size_stride(permute_317, (2304, 768), (768, 1)) + assert_size_stride(div_22, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_321, (768, 3072), (3072, 1)) + assert_size_stride(permute_325, (3072, 768), (768, 1)) + assert_size_stride(div_23, (1, 64, 1), (64, 1, 1)) + assert_size_stride(permute_329, (768, 768), (768, 1)) + assert_size_stride(permute_337, (2304, 768), (768, 1)) + assert_size_stride(div_24, (1, 64, 1), (64, 1, 1)) + assert_size_stride(tangents_1, (1, 1, 50304), (50304, 50304, 1)) + with torch.cuda._DeviceGuard(0): + torch.cuda.set_device(0) + buf0 = empty_strided_cuda((50304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(tangents_1, (50304, 1), (1, 50304), 0), view_144, out=buf0) + del view_144 + buf1 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(tangents_1, (1, 50304), (50304, 1), 0), permute_99, out=buf1) + del permute_99 + del tangents_1 + buf2 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.new_zeros] + stream0 = get_raw_stream(0) + triton_poi_fused_new_zeros_0.run(buf2, 49152, grid=grid(49152), stream=stream0) + # Source Nodes: [], Original ATen: [aten.index_put, aten.new_zeros] + triton_poi_fused_index_put_new_zeros_1.run(full_default, buf1, buf2, 768, grid=grid(768), stream=stream0) + del full_default + buf6 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_2.run(buf2, primals_148, mul_96, div, buf6, 64, 768, grid=grid(64), stream=stream0) + del div + del primals_148 + buf7 = reinterpret_tensor(buf1, (768, ), (1, ), 0); del buf1 # reuse + buf8 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf2, mul_96, buf7, buf8, 768, 64, grid=grid(768), stream=stream0) + del mul_96 + buf9 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf6, (64, 768), (768, 1), 0), permute_101, out=buf9) + del permute_101 + buf10 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf6, (768, 64), (1, 768), 0), view_142, out=buf10) + del view_142 + buf11 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf6, buf11, 768, 64, grid=grid(768), stream=stream0) + buf12 = reinterpret_tensor(buf9, (1, 64, 3072), (196608, 3072, 1), 0); del buf9 # reuse + # Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf12, addmm_46, 196608, grid=grid(196608), stream=stream0) + del addmm_46 + buf13 = reinterpret_tensor(buf2, (64, 768), (768, 1), 0); del buf2 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf12, (64, 3072), (3072, 1), 0), permute_105, out=buf13) + del permute_105 + buf14 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf12, (3072, 64), (1, 3072), 0), view_140, out=buf14) + del view_140 + buf15 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf12, buf15, 3072, 64, grid=grid(3072), stream=stream0) + buf20 = buf6; del buf6 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf20, buf13, primals_142, mul_90, div_1, 64, 768, grid=grid(64), stream=stream0) + del div_1 + del primals_142 + buf18 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf19 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf13, mul_90, buf18, buf19, 768, 64, grid=grid(768), stream=stream0) + del mul_90 + buf21 = buf13; del buf13 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf20, (64, 768), (768, 1), 0), permute_109, out=buf21) + del permute_109 + buf22 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf20, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_126, (64, 768), (768, 1), 0), out=buf22) + buf23 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf20, buf23, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf24 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf21, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True) + del buf21 + del getitem_126 + del getitem_127 + del getitem_128 + del getitem_129 + del permute_89 + del permute_90 + del permute_91 + buf25 = buf24[0] + buf26 = buf24[1] + buf27 = buf24[2] + del buf24 + buf28 = empty_strided_cuda((1, 64, 2304), (147456, 2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf25, buf26, buf27, buf28, 147456, grid=grid(147456), stream=stream0) + del buf25 + del buf26 + buf29 = reinterpret_tensor(buf27, (64, 768), (768, 1), 0); del buf27 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf28, (64, 2304), (2304, 1), 0), permute_117, out=buf29) + del permute_117 + buf30 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf28, (2304, 64), (1, 2304), 0), view_132, out=buf30) + del view_132 + buf31 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf28, buf31, 2304, 64, grid=grid(2304), stream=stream0) + buf36 = buf20; del buf20 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf36, buf29, primals_136, mul_88, div_2, 64, 768, grid=grid(64), stream=stream0) + del div_2 + del primals_136 + buf34 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf35 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf29, mul_88, buf34, buf35, 768, 64, grid=grid(768), stream=stream0) + del mul_88 + buf37 = reinterpret_tensor(buf12, (64, 3072), (3072, 1), 0); del buf12 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf36, (64, 768), (768, 1), 0), permute_121, out=buf37) + del permute_121 + buf38 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf36, (768, 64), (1, 768), 0), view_130, out=buf38) + del view_130 + buf39 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf36, buf39, 768, 64, grid=grid(768), stream=stream0) + buf40 = reinterpret_tensor(buf37, (1, 64, 3072), (196608, 3072, 1), 0); del buf37 # reuse + # Source Nodes: [add_42, add_43, mul_40, mul_41, mul_42, pow_11, tanh_10], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf40, addmm_42, 196608, grid=grid(196608), stream=stream0) + del addmm_42 + buf41 = buf29; del buf29 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf40, (64, 3072), (3072, 1), 0), permute_125, out=buf41) + del permute_125 + buf42 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf40, (3072, 64), (1, 3072), 0), view_128, out=buf42) + del view_128 + buf43 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf40, buf43, 3072, 64, grid=grid(3072), stream=stream0) + buf48 = buf36; del buf36 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf48, buf41, primals_130, mul_82, div_3, 64, 768, grid=grid(64), stream=stream0) + del div_3 + del primals_130 + buf46 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf47 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf41, mul_82, buf46, buf47, 768, 64, grid=grid(768), stream=stream0) + del mul_82 + buf49 = buf41; del buf41 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf48, (64, 768), (768, 1), 0), permute_129, out=buf49) + del permute_129 + buf50 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf48, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_115, (64, 768), (768, 1), 0), out=buf50) + buf51 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf48, buf51, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf52 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf49, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True) + del buf49 + del getitem_115 + del getitem_116 + del getitem_117 + del getitem_118 + del permute_81 + del permute_82 + del permute_83 + buf53 = buf52[0] + buf54 = buf52[1] + buf55 = buf52[2] + del buf52 + buf56 = buf28; del buf28 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf53, buf54, buf55, buf56, 147456, grid=grid(147456), stream=stream0) + del buf53 + del buf54 + buf57 = reinterpret_tensor(buf55, (64, 768), (768, 1), 0); del buf55 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf56, (64, 2304), (2304, 1), 0), permute_137, out=buf57) + del permute_137 + buf58 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf56, (2304, 64), (1, 2304), 0), view_120, out=buf58) + del view_120 + buf59 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf56, buf59, 2304, 64, grid=grid(2304), stream=stream0) + buf64 = buf48; del buf48 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf64, buf57, primals_124, mul_80, div_4, 64, 768, grid=grid(64), stream=stream0) + del div_4 + del primals_124 + buf62 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf63 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf57, mul_80, buf62, buf63, 768, 64, grid=grid(768), stream=stream0) + del mul_80 + buf65 = reinterpret_tensor(buf40, (64, 3072), (3072, 1), 0); del buf40 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf64, (64, 768), (768, 1), 0), permute_141, out=buf65) + del permute_141 + buf66 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf64, (768, 64), (1, 768), 0), view_118, out=buf66) + del view_118 + buf67 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf64, buf67, 768, 64, grid=grid(768), stream=stream0) + buf68 = reinterpret_tensor(buf65, (1, 64, 3072), (196608, 3072, 1), 0); del buf65 # reuse + # Source Nodes: [add_38, add_39, mul_36, mul_37, mul_38, pow_10, tanh_9], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf68, addmm_38, 196608, grid=grid(196608), stream=stream0) + del addmm_38 + buf69 = buf57; del buf57 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf68, (64, 3072), (3072, 1), 0), permute_145, out=buf69) + del permute_145 + buf70 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf68, (3072, 64), (1, 3072), 0), view_116, out=buf70) + del view_116 + buf71 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf68, buf71, 3072, 64, grid=grid(3072), stream=stream0) + buf76 = buf64; del buf64 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf76, buf69, primals_118, mul_74, div_5, 64, 768, grid=grid(64), stream=stream0) + del div_5 + del primals_118 + buf74 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf75 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf69, mul_74, buf74, buf75, 768, 64, grid=grid(768), stream=stream0) + del mul_74 + buf77 = buf69; del buf69 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf76, (64, 768), (768, 1), 0), permute_149, out=buf77) + del permute_149 + buf78 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf76, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_104, (64, 768), (768, 1), 0), out=buf78) + buf79 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf76, buf79, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf80 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf77, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True) + del buf77 + del getitem_104 + del getitem_105 + del getitem_106 + del getitem_107 + del permute_73 + del permute_74 + del permute_75 + buf81 = buf80[0] + buf82 = buf80[1] + buf83 = buf80[2] + del buf80 + buf84 = buf56; del buf56 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf81, buf82, buf83, buf84, 147456, grid=grid(147456), stream=stream0) + del buf81 + del buf82 + buf85 = reinterpret_tensor(buf83, (64, 768), (768, 1), 0); del buf83 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf84, (64, 2304), (2304, 1), 0), permute_157, out=buf85) + del permute_157 + buf86 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf84, (2304, 64), (1, 2304), 0), view_108, out=buf86) + del view_108 + buf87 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf84, buf87, 2304, 64, grid=grid(2304), stream=stream0) + buf92 = buf76; del buf76 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf92, buf85, primals_112, mul_72, div_6, 64, 768, grid=grid(64), stream=stream0) + del div_6 + del primals_112 + buf90 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf91 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf85, mul_72, buf90, buf91, 768, 64, grid=grid(768), stream=stream0) + del mul_72 + buf93 = reinterpret_tensor(buf68, (64, 3072), (3072, 1), 0); del buf68 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf92, (64, 768), (768, 1), 0), permute_161, out=buf93) + del permute_161 + buf94 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf92, (768, 64), (1, 768), 0), view_106, out=buf94) + del view_106 + buf95 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf92, buf95, 768, 64, grid=grid(768), stream=stream0) + buf96 = reinterpret_tensor(buf93, (1, 64, 3072), (196608, 3072, 1), 0); del buf93 # reuse + # Source Nodes: [add_34, add_35, mul_32, mul_33, mul_34, pow_9, tanh_8], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf96, addmm_34, 196608, grid=grid(196608), stream=stream0) + del addmm_34 + buf97 = buf85; del buf85 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf96, (64, 3072), (3072, 1), 0), permute_165, out=buf97) + del permute_165 + buf98 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf96, (3072, 64), (1, 3072), 0), view_104, out=buf98) + del view_104 + buf99 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf96, buf99, 3072, 64, grid=grid(3072), stream=stream0) + buf104 = buf92; del buf92 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf104, buf97, primals_106, mul_66, div_7, 64, 768, grid=grid(64), stream=stream0) + del div_7 + del primals_106 + buf102 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf103 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf97, mul_66, buf102, buf103, 768, 64, grid=grid(768), stream=stream0) + del mul_66 + buf105 = buf97; del buf97 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf104, (64, 768), (768, 1), 0), permute_169, out=buf105) + del permute_169 + buf106 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf104, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_93, (64, 768), (768, 1), 0), out=buf106) + buf107 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf104, buf107, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf108 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf105, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True) + del buf105 + del getitem_93 + del getitem_94 + del getitem_95 + del getitem_96 + del permute_65 + del permute_66 + del permute_67 + buf109 = buf108[0] + buf110 = buf108[1] + buf111 = buf108[2] + del buf108 + buf112 = buf84; del buf84 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf109, buf110, buf111, buf112, 147456, grid=grid(147456), stream=stream0) + del buf109 + del buf110 + buf113 = reinterpret_tensor(buf111, (64, 768), (768, 1), 0); del buf111 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf112, (64, 2304), (2304, 1), 0), permute_177, out=buf113) + del permute_177 + buf114 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf112, (2304, 64), (1, 2304), 0), view_96, out=buf114) + del view_96 + buf115 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf112, buf115, 2304, 64, grid=grid(2304), stream=stream0) + buf120 = buf104; del buf104 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf120, buf113, primals_100, mul_64, div_8, 64, 768, grid=grid(64), stream=stream0) + del div_8 + del primals_100 + buf118 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf119 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf113, mul_64, buf118, buf119, 768, 64, grid=grid(768), stream=stream0) + del mul_64 + buf121 = reinterpret_tensor(buf96, (64, 3072), (3072, 1), 0); del buf96 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf120, (64, 768), (768, 1), 0), permute_181, out=buf121) + del permute_181 + buf122 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf120, (768, 64), (1, 768), 0), view_94, out=buf122) + del view_94 + buf123 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf120, buf123, 768, 64, grid=grid(768), stream=stream0) + buf124 = reinterpret_tensor(buf121, (1, 64, 3072), (196608, 3072, 1), 0); del buf121 # reuse + # Source Nodes: [add_30, add_31, mul_28, mul_29, mul_30, pow_8, tanh_7], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf124, addmm_30, 196608, grid=grid(196608), stream=stream0) + del addmm_30 + buf125 = buf113; del buf113 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf124, (64, 3072), (3072, 1), 0), permute_185, out=buf125) + del permute_185 + buf126 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf124, (3072, 64), (1, 3072), 0), view_92, out=buf126) + del view_92 + buf127 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf124, buf127, 3072, 64, grid=grid(3072), stream=stream0) + buf132 = buf120; del buf120 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf132, buf125, primals_94, mul_58, div_9, 64, 768, grid=grid(64), stream=stream0) + del div_9 + del primals_94 + buf130 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf131 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf125, mul_58, buf130, buf131, 768, 64, grid=grid(768), stream=stream0) + del mul_58 + buf133 = buf125; del buf125 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf132, (64, 768), (768, 1), 0), permute_189, out=buf133) + del permute_189 + buf134 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf132, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_82, (64, 768), (768, 1), 0), out=buf134) + buf135 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf132, buf135, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf136 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf133, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True) + del buf133 + del getitem_82 + del getitem_83 + del getitem_84 + del getitem_85 + del permute_57 + del permute_58 + del permute_59 + buf137 = buf136[0] + buf138 = buf136[1] + buf139 = buf136[2] + del buf136 + buf140 = buf112; del buf112 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf137, buf138, buf139, buf140, 147456, grid=grid(147456), stream=stream0) + del buf137 + del buf138 + buf141 = reinterpret_tensor(buf139, (64, 768), (768, 1), 0); del buf139 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf140, (64, 2304), (2304, 1), 0), permute_197, out=buf141) + del permute_197 + buf142 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf140, (2304, 64), (1, 2304), 0), view_84, out=buf142) + del view_84 + buf143 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf140, buf143, 2304, 64, grid=grid(2304), stream=stream0) + buf148 = buf132; del buf132 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf148, buf141, primals_88, mul_56, div_10, 64, 768, grid=grid(64), stream=stream0) + del div_10 + del primals_88 + buf146 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf147 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf141, mul_56, buf146, buf147, 768, 64, grid=grid(768), stream=stream0) + del mul_56 + buf149 = reinterpret_tensor(buf124, (64, 3072), (3072, 1), 0); del buf124 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf148, (64, 768), (768, 1), 0), permute_201, out=buf149) + del permute_201 + buf150 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf148, (768, 64), (1, 768), 0), view_82, out=buf150) + del view_82 + buf151 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf148, buf151, 768, 64, grid=grid(768), stream=stream0) + buf152 = reinterpret_tensor(buf149, (1, 64, 3072), (196608, 3072, 1), 0); del buf149 # reuse + # Source Nodes: [add_26, add_27, mul_24, mul_25, mul_26, pow_7, tanh_6], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf152, addmm_26, 196608, grid=grid(196608), stream=stream0) + del addmm_26 + buf153 = buf141; del buf141 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf152, (64, 3072), (3072, 1), 0), permute_205, out=buf153) + del permute_205 + buf154 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf152, (3072, 64), (1, 3072), 0), view_80, out=buf154) + del view_80 + buf155 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf152, buf155, 3072, 64, grid=grid(3072), stream=stream0) + buf160 = buf148; del buf148 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf160, buf153, primals_82, mul_50, div_11, 64, 768, grid=grid(64), stream=stream0) + del div_11 + del primals_82 + buf158 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf159 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf153, mul_50, buf158, buf159, 768, 64, grid=grid(768), stream=stream0) + del mul_50 + buf161 = buf153; del buf153 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf160, (64, 768), (768, 1), 0), permute_209, out=buf161) + del permute_209 + buf162 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf160, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_71, (64, 768), (768, 1), 0), out=buf162) + buf163 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf160, buf163, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf164 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf161, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True) + del buf161 + del getitem_71 + del getitem_72 + del getitem_73 + del getitem_74 + del permute_49 + del permute_50 + del permute_51 + buf165 = buf164[0] + buf166 = buf164[1] + buf167 = buf164[2] + del buf164 + buf168 = buf140; del buf140 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf165, buf166, buf167, buf168, 147456, grid=grid(147456), stream=stream0) + del buf165 + del buf166 + buf169 = reinterpret_tensor(buf167, (64, 768), (768, 1), 0); del buf167 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf168, (64, 2304), (2304, 1), 0), permute_217, out=buf169) + del permute_217 + buf170 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf168, (2304, 64), (1, 2304), 0), view_72, out=buf170) + del view_72 + buf171 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf168, buf171, 2304, 64, grid=grid(2304), stream=stream0) + buf176 = buf160; del buf160 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf176, buf169, primals_76, mul_48, div_12, 64, 768, grid=grid(64), stream=stream0) + del div_12 + del primals_76 + buf174 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf175 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf169, mul_48, buf174, buf175, 768, 64, grid=grid(768), stream=stream0) + del mul_48 + buf177 = reinterpret_tensor(buf152, (64, 3072), (3072, 1), 0); del buf152 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf176, (64, 768), (768, 1), 0), permute_221, out=buf177) + del permute_221 + buf178 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf176, (768, 64), (1, 768), 0), view_70, out=buf178) + del view_70 + buf179 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf176, buf179, 768, 64, grid=grid(768), stream=stream0) + buf180 = reinterpret_tensor(buf177, (1, 64, 3072), (196608, 3072, 1), 0); del buf177 # reuse + # Source Nodes: [add_22, add_23, mul_20, mul_21, mul_22, pow_6, tanh_5], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf180, addmm_22, 196608, grid=grid(196608), stream=stream0) + del addmm_22 + buf181 = buf169; del buf169 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf180, (64, 3072), (3072, 1), 0), permute_225, out=buf181) + del permute_225 + buf182 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf180, (3072, 64), (1, 3072), 0), view_68, out=buf182) + del view_68 + buf183 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf180, buf183, 3072, 64, grid=grid(3072), stream=stream0) + buf188 = buf176; del buf176 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf188, buf181, primals_70, mul_42, div_13, 64, 768, grid=grid(64), stream=stream0) + del div_13 + del primals_70 + buf186 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf187 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf181, mul_42, buf186, buf187, 768, 64, grid=grid(768), stream=stream0) + del mul_42 + buf189 = buf181; del buf181 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf188, (64, 768), (768, 1), 0), permute_229, out=buf189) + del permute_229 + buf190 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf188, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_60, (64, 768), (768, 1), 0), out=buf190) + buf191 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf188, buf191, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf192 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf189, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True) + del buf189 + del getitem_60 + del getitem_61 + del getitem_62 + del getitem_63 + del permute_41 + del permute_42 + del permute_43 + buf193 = buf192[0] + buf194 = buf192[1] + buf195 = buf192[2] + del buf192 + buf196 = buf168; del buf168 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf193, buf194, buf195, buf196, 147456, grid=grid(147456), stream=stream0) + del buf193 + del buf194 + buf197 = reinterpret_tensor(buf195, (64, 768), (768, 1), 0); del buf195 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf196, (64, 2304), (2304, 1), 0), permute_237, out=buf197) + del permute_237 + buf198 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf196, (2304, 64), (1, 2304), 0), view_60, out=buf198) + del view_60 + buf199 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf196, buf199, 2304, 64, grid=grid(2304), stream=stream0) + buf204 = buf188; del buf188 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf204, buf197, primals_64, mul_40, div_14, 64, 768, grid=grid(64), stream=stream0) + del div_14 + del primals_64 + buf202 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf203 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf197, mul_40, buf202, buf203, 768, 64, grid=grid(768), stream=stream0) + del mul_40 + buf205 = reinterpret_tensor(buf180, (64, 3072), (3072, 1), 0); del buf180 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf204, (64, 768), (768, 1), 0), permute_241, out=buf205) + del permute_241 + buf206 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf204, (768, 64), (1, 768), 0), view_58, out=buf206) + del view_58 + buf207 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf204, buf207, 768, 64, grid=grid(768), stream=stream0) + buf208 = reinterpret_tensor(buf205, (1, 64, 3072), (196608, 3072, 1), 0); del buf205 # reuse + # Source Nodes: [add_18, add_19, mul_16, mul_17, mul_18, pow_5, tanh_4], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf208, addmm_18, 196608, grid=grid(196608), stream=stream0) + del addmm_18 + buf209 = buf197; del buf197 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf208, (64, 3072), (3072, 1), 0), permute_245, out=buf209) + del permute_245 + buf210 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf208, (3072, 64), (1, 3072), 0), view_56, out=buf210) + del view_56 + buf211 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf208, buf211, 3072, 64, grid=grid(3072), stream=stream0) + buf216 = buf204; del buf204 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf216, buf209, primals_58, mul_34, div_15, 64, 768, grid=grid(64), stream=stream0) + del div_15 + del primals_58 + buf214 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf215 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf209, mul_34, buf214, buf215, 768, 64, grid=grid(768), stream=stream0) + del mul_34 + buf217 = buf209; del buf209 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf216, (64, 768), (768, 1), 0), permute_249, out=buf217) + del permute_249 + buf218 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf216, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_49, (64, 768), (768, 1), 0), out=buf218) + buf219 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf216, buf219, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf220 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf217, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True) + del buf217 + del getitem_49 + del getitem_50 + del getitem_51 + del getitem_52 + del permute_33 + del permute_34 + del permute_35 + buf221 = buf220[0] + buf222 = buf220[1] + buf223 = buf220[2] + del buf220 + buf224 = buf196; del buf196 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf221, buf222, buf223, buf224, 147456, grid=grid(147456), stream=stream0) + del buf221 + del buf222 + buf225 = reinterpret_tensor(buf223, (64, 768), (768, 1), 0); del buf223 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf224, (64, 2304), (2304, 1), 0), permute_257, out=buf225) + del permute_257 + buf226 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf224, (2304, 64), (1, 2304), 0), view_48, out=buf226) + del view_48 + buf227 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf224, buf227, 2304, 64, grid=grid(2304), stream=stream0) + buf232 = buf216; del buf216 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf232, buf225, primals_52, mul_32, div_16, 64, 768, grid=grid(64), stream=stream0) + del div_16 + del primals_52 + buf230 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf231 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf225, mul_32, buf230, buf231, 768, 64, grid=grid(768), stream=stream0) + del mul_32 + buf233 = reinterpret_tensor(buf208, (64, 3072), (3072, 1), 0); del buf208 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf232, (64, 768), (768, 1), 0), permute_261, out=buf233) + del permute_261 + buf234 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf232, (768, 64), (1, 768), 0), view_46, out=buf234) + del view_46 + buf235 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf232, buf235, 768, 64, grid=grid(768), stream=stream0) + buf236 = reinterpret_tensor(buf233, (1, 64, 3072), (196608, 3072, 1), 0); del buf233 # reuse + # Source Nodes: [add_14, add_15, mul_12, mul_13, mul_14, pow_4, tanh_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf236, addmm_14, 196608, grid=grid(196608), stream=stream0) + del addmm_14 + buf237 = buf225; del buf225 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf236, (64, 3072), (3072, 1), 0), permute_265, out=buf237) + del permute_265 + buf238 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf236, (3072, 64), (1, 3072), 0), view_44, out=buf238) + del view_44 + buf239 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf236, buf239, 3072, 64, grid=grid(3072), stream=stream0) + buf244 = buf232; del buf232 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf244, buf237, primals_46, mul_26, div_17, 64, 768, grid=grid(64), stream=stream0) + del div_17 + del primals_46 + buf242 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf243 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf237, mul_26, buf242, buf243, 768, 64, grid=grid(768), stream=stream0) + del mul_26 + buf245 = buf237; del buf237 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf244, (64, 768), (768, 1), 0), permute_269, out=buf245) + del permute_269 + buf246 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf244, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_38, (64, 768), (768, 1), 0), out=buf246) + buf247 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf244, buf247, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf248 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf245, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True) + del buf245 + del getitem_38 + del getitem_39 + del getitem_40 + del getitem_41 + del permute_25 + del permute_26 + del permute_27 + buf249 = buf248[0] + buf250 = buf248[1] + buf251 = buf248[2] + del buf248 + buf252 = buf224; del buf224 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf249, buf250, buf251, buf252, 147456, grid=grid(147456), stream=stream0) + del buf249 + del buf250 + buf253 = reinterpret_tensor(buf251, (64, 768), (768, 1), 0); del buf251 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf252, (64, 2304), (2304, 1), 0), permute_277, out=buf253) + del permute_277 + buf254 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf252, (2304, 64), (1, 2304), 0), view_36, out=buf254) + del view_36 + buf255 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf252, buf255, 2304, 64, grid=grid(2304), stream=stream0) + buf260 = buf244; del buf244 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf260, buf253, primals_40, mul_24, div_18, 64, 768, grid=grid(64), stream=stream0) + del div_18 + del primals_40 + buf258 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf259 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf253, mul_24, buf258, buf259, 768, 64, grid=grid(768), stream=stream0) + del mul_24 + buf261 = reinterpret_tensor(buf236, (64, 3072), (3072, 1), 0); del buf236 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf260, (64, 768), (768, 1), 0), permute_281, out=buf261) + del permute_281 + buf262 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf260, (768, 64), (1, 768), 0), view_34, out=buf262) + del view_34 + buf263 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf260, buf263, 768, 64, grid=grid(768), stream=stream0) + buf264 = reinterpret_tensor(buf261, (1, 64, 3072), (196608, 3072, 1), 0); del buf261 # reuse + # Source Nodes: [add_10, add_11, mul_10, mul_8, mul_9, pow_3, tanh_2], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf264, addmm_10, 196608, grid=grid(196608), stream=stream0) + del addmm_10 + buf265 = buf253; del buf253 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf264, (64, 3072), (3072, 1), 0), permute_285, out=buf265) + del permute_285 + buf266 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf264, (3072, 64), (1, 3072), 0), view_32, out=buf266) + del view_32 + buf267 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf264, buf267, 3072, 64, grid=grid(3072), stream=stream0) + buf272 = buf260; del buf260 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf272, buf265, primals_34, mul_18, div_19, 64, 768, grid=grid(64), stream=stream0) + del div_19 + del primals_34 + buf270 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf271 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf265, mul_18, buf270, buf271, 768, 64, grid=grid(768), stream=stream0) + del mul_18 + buf273 = buf265; del buf265 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf272, (64, 768), (768, 1), 0), permute_289, out=buf273) + del permute_289 + buf274 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf272, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_27, (64, 768), (768, 1), 0), out=buf274) + buf275 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf272, buf275, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf276 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf273, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True) + del buf273 + del getitem_27 + del getitem_28 + del getitem_29 + del getitem_30 + del permute_17 + del permute_18 + del permute_19 + buf277 = buf276[0] + buf278 = buf276[1] + buf279 = buf276[2] + del buf276 + buf280 = buf252; del buf252 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf277, buf278, buf279, buf280, 147456, grid=grid(147456), stream=stream0) + del buf277 + del buf278 + buf281 = reinterpret_tensor(buf279, (64, 768), (768, 1), 0); del buf279 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf280, (64, 2304), (2304, 1), 0), permute_297, out=buf281) + del permute_297 + buf282 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf280, (2304, 64), (1, 2304), 0), view_24, out=buf282) + del view_24 + buf283 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf280, buf283, 2304, 64, grid=grid(2304), stream=stream0) + buf288 = buf272; del buf272 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf288, buf281, primals_28, mul_16, div_20, 64, 768, grid=grid(64), stream=stream0) + del div_20 + del primals_28 + buf286 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf287 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf281, mul_16, buf286, buf287, 768, 64, grid=grid(768), stream=stream0) + del mul_16 + buf289 = reinterpret_tensor(buf264, (64, 3072), (3072, 1), 0); del buf264 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf288, (64, 768), (768, 1), 0), permute_301, out=buf289) + del permute_301 + buf290 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf288, (768, 64), (1, 768), 0), view_22, out=buf290) + del view_22 + buf291 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf288, buf291, 768, 64, grid=grid(768), stream=stream0) + buf292 = reinterpret_tensor(buf289, (1, 64, 3072), (196608, 3072, 1), 0); del buf289 # reuse + # Source Nodes: [add_6, add_7, mul_4, mul_5, mul_6, pow_2, tanh_1], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf292, addmm_6, 196608, grid=grid(196608), stream=stream0) + del addmm_6 + buf293 = buf281; del buf281 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf292, (64, 3072), (3072, 1), 0), permute_305, out=buf293) + del permute_305 + buf294 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf292, (3072, 64), (1, 3072), 0), view_20, out=buf294) + del view_20 + buf295 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf292, buf295, 3072, 64, grid=grid(3072), stream=stream0) + buf300 = buf288; del buf288 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf300, buf293, primals_22, mul_10, div_21, 64, 768, grid=grid(64), stream=stream0) + del div_21 + del primals_22 + buf298 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf299 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf293, mul_10, buf298, buf299, 768, 64, grid=grid(768), stream=stream0) + del mul_10 + buf301 = buf293; del buf293 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf300, (64, 768), (768, 1), 0), permute_309, out=buf301) + del permute_309 + buf302 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf300, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_16, (64, 768), (768, 1), 0), out=buf302) + buf303 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf300, buf303, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf304 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf301, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True) + del buf301 + del getitem_16 + del getitem_17 + del getitem_18 + del getitem_19 + del permute_10 + del permute_11 + del permute_9 + buf305 = buf304[0] + buf306 = buf304[1] + buf307 = buf304[2] + del buf304 + buf308 = buf280; del buf280 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf305, buf306, buf307, buf308, 147456, grid=grid(147456), stream=stream0) + del buf305 + del buf306 + buf309 = reinterpret_tensor(buf307, (64, 768), (768, 1), 0); del buf307 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf308, (64, 2304), (2304, 1), 0), permute_317, out=buf309) + del permute_317 + buf310 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf308, (2304, 64), (1, 2304), 0), view_12, out=buf310) + del view_12 + buf311 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf308, buf311, 2304, 64, grid=grid(2304), stream=stream0) + buf316 = buf300; del buf300 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf316, buf309, primals_16, mul_8, div_22, 64, 768, grid=grid(64), stream=stream0) + del div_22 + del primals_16 + buf314 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf315 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf309, mul_8, buf314, buf315, 768, 64, grid=grid(768), stream=stream0) + del mul_8 + buf317 = reinterpret_tensor(buf292, (64, 3072), (3072, 1), 0); del buf292 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf316, (64, 768), (768, 1), 0), permute_321, out=buf317) + del permute_321 + buf318 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf316, (768, 64), (1, 768), 0), view_10, out=buf318) + del view_10 + buf319 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf316, buf319, 768, 64, grid=grid(768), stream=stream0) + buf320 = reinterpret_tensor(buf317, (1, 64, 3072), (196608, 3072, 1), 0); del buf317 # reuse + # Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward] + triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf320, addmm_2, 196608, grid=grid(196608), stream=stream0) + del addmm_2 + buf321 = buf309; del buf309 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf320, (64, 3072), (3072, 1), 0), permute_325, out=buf321) + del permute_325 + buf322 = empty_strided_cuda((3072, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf320, (3072, 64), (1, 3072), 0), view_8, out=buf322) + del view_8 + buf323 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_6.run(buf320, buf323, 3072, 64, grid=grid(3072), stream=stream0) + del buf320 + buf328 = buf316; del buf316 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward] + triton_per_fused_add_native_layer_norm_backward_7.run(buf328, buf321, primals_10, mul_2, div_23, 64, 768, grid=grid(64), stream=stream0) + del div_23 + del primals_10 + buf326 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf327 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf321, mul_2, buf326, buf327, 768, 64, grid=grid(768), stream=stream0) + del mul_2 + buf329 = buf321; del buf321 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf328, (64, 768), (768, 1), 0), permute_329, out=buf329) + del permute_329 + buf330 = empty_strided_cuda((768, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf328, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_5, (64, 768), (768, 1), 0), out=buf330) + buf331 = empty_strided_cuda((1, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_4.run(buf328, buf331, 768, 64, grid=grid(768), stream=stream0) + # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward] + buf332 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf329, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True) + del buf329 + del getitem_5 + del getitem_6 + del getitem_7 + del getitem_8 + del permute_1 + del permute_2 + del permute_3 + buf333 = buf332[0] + buf334 = buf332[1] + buf335 = buf332[2] + del buf332 + buf336 = buf308; del buf308 # reuse + # Source Nodes: [], Original ATen: [aten.cat] + triton_poi_fused_cat_8.run(buf333, buf334, buf335, buf336, 147456, grid=grid(147456), stream=stream0) + del buf333 + del buf334 + buf337 = reinterpret_tensor(buf335, (64, 768), (768, 1), 0); del buf335 # reuse + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf336, (64, 2304), (2304, 1), 0), permute_337, out=buf337) + del permute_337 + buf338 = empty_strided_cuda((2304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.mm] + extern_kernels.mm(reinterpret_tensor(buf336, (2304, 64), (1, 2304), 0), view, out=buf338) + del view + buf339 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.sum] + triton_per_fused_sum_9.run(buf336, buf339, 2304, 64, grid=grid(2304), stream=stream0) + del buf336 + buf345 = empty_strided_cuda((1024, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.embedding_dense_backward] + triton_poi_fused_embedding_dense_backward_10.run(buf345, 786432, grid=grid(786432), stream=stream0) + buf347 = empty_strided_cuda((50304, 768), (768, 1), torch.float32) + # Source Nodes: [], Original ATen: [aten.embedding_dense_backward] + triton_poi_fused_embedding_dense_backward_11.run(buf347, 38633472, grid=grid(38633472), stream=stream0) + buf344 = buf328; del buf328 # reuse + # Source Nodes: [], Original ATen: [aten.add, aten.embedding_dense_backward, aten.native_layer_norm_backward] + triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12.run(buf344, buf337, primals_4, mul, div_24, unsqueeze, primals_1, buf345, buf347, 64, 768, grid=grid(64), stream=stream0) + del buf344 + del div_24 + del primals_1 + del primals_4 + del unsqueeze + buf342 = empty_strided_cuda((768, ), (1, ), torch.float32) + buf343 = empty_strided_cuda((768, ), (1, ), torch.float32) + # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward] + triton_per_fused_native_layer_norm_backward_3.run(buf337, mul, buf342, buf343, 768, 64, grid=grid(768), stream=stream0) + del buf337 + del mul + buf349 = buf0; del buf0 # reuse + # Source Nodes: [], Original ATen: [aten.add] + triton_poi_fused_add_13.run(buf349, buf347, 38633472, grid=grid(38633472), stream=stream0) + del buf347 + return (None, buf349, buf345, buf342, buf343, buf338, reinterpret_tensor(buf339, (2304, ), (1, ), 0), buf330, reinterpret_tensor(buf331, (768, ), (1, ), 0), buf326, buf327, buf322, reinterpret_tensor(buf323, (3072, ), (1, ), 0), buf318, reinterpret_tensor(buf319, (768, ), (1, ), 0), buf314, buf315, buf310, reinterpret_tensor(buf311, (2304, ), (1, ), 0), buf302, reinterpret_tensor(buf303, (768, ), (1, ), 0), buf298, buf299, buf294, reinterpret_tensor(buf295, (3072, ), (1, ), 0), buf290, reinterpret_tensor(buf291, (768, ), (1, ), 0), buf286, buf287, buf282, reinterpret_tensor(buf283, (2304, ), (1, ), 0), buf274, reinterpret_tensor(buf275, (768, ), (1, ), 0), buf270, buf271, buf266, reinterpret_tensor(buf267, (3072, ), (1, ), 0), buf262, reinterpret_tensor(buf263, (768, ), (1, ), 0), buf258, buf259, buf254, reinterpret_tensor(buf255, (2304, ), (1, ), 0), buf246, reinterpret_tensor(buf247, (768, ), (1, ), 0), buf242, buf243, buf238, reinterpret_tensor(buf239, (3072, ), (1, ), 0), buf234, reinterpret_tensor(buf235, (768, ), (1, ), 0), buf230, buf231, buf226, reinterpret_tensor(buf227, (2304, ), (1, ), 0), buf218, reinterpret_tensor(buf219, (768, ), (1, ), 0), buf214, buf215, buf210, reinterpret_tensor(buf211, (3072, ), (1, ), 0), buf206, reinterpret_tensor(buf207, (768, ), (1, ), 0), buf202, buf203, buf198, reinterpret_tensor(buf199, (2304, ), (1, ), 0), buf190, reinterpret_tensor(buf191, (768, ), (1, ), 0), buf186, buf187, buf182, reinterpret_tensor(buf183, (3072, ), (1, ), 0), buf178, reinterpret_tensor(buf179, (768, ), (1, ), 0), buf174, buf175, buf170, reinterpret_tensor(buf171, (2304, ), (1, ), 0), buf162, reinterpret_tensor(buf163, (768, ), (1, ), 0), buf158, buf159, buf154, reinterpret_tensor(buf155, (3072, ), (1, ), 0), buf150, reinterpret_tensor(buf151, (768, ), (1, ), 0), buf146, buf147, buf142, reinterpret_tensor(buf143, (2304, ), (1, ), 0), buf134, reinterpret_tensor(buf135, (768, ), (1, ), 0), buf130, buf131, buf126, reinterpret_tensor(buf127, (3072, ), (1, ), 0), buf122, reinterpret_tensor(buf123, (768, ), (1, ), 0), buf118, buf119, buf114, reinterpret_tensor(buf115, (2304, ), (1, ), 0), buf106, reinterpret_tensor(buf107, (768, ), (1, ), 0), buf102, buf103, buf98, reinterpret_tensor(buf99, (3072, ), (1, ), 0), buf94, reinterpret_tensor(buf95, (768, ), (1, ), 0), buf90, buf91, buf86, reinterpret_tensor(buf87, (2304, ), (1, ), 0), buf78, reinterpret_tensor(buf79, (768, ), (1, ), 0), buf74, buf75, buf70, reinterpret_tensor(buf71, (3072, ), (1, ), 0), buf66, reinterpret_tensor(buf67, (768, ), (1, ), 0), buf62, buf63, buf58, reinterpret_tensor(buf59, (2304, ), (1, ), 0), buf50, reinterpret_tensor(buf51, (768, ), (1, ), 0), buf46, buf47, buf42, reinterpret_tensor(buf43, (3072, ), (1, ), 0), buf38, reinterpret_tensor(buf39, (768, ), (1, ), 0), buf34, buf35, buf30, reinterpret_tensor(buf31, (2304, ), (1, ), 0), buf22, reinterpret_tensor(buf23, (768, ), (1, ), 0), buf18, buf19, buf14, reinterpret_tensor(buf15, (3072, ), (1, ), 0), buf10, reinterpret_tensor(buf11, (768, ), (1, ), 0), buf7, buf8, ) + + + def benchmark_compiled_module(times=10, repeat=10): + from torch._dynamo.testing import rand_strided + from torch._inductor.utils import print_performance + primals_1 = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64) + primals_4 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_10 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_16 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_22 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_28 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_34 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_40 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_46 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_52 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_58 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_64 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_70 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_76 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_82 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_88 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_94 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_100 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_106 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_112 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_118 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_124 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_130 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_136 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_142 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + primals_148 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + unsqueeze = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64) + mul = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_1 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_2 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_3 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_5 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_6 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_7 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_8 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_2 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_8 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_2 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_10 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_8 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_12 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_9 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_10 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_11 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_16 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_17 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_18 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_19 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_10 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_20 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_6 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_22 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_16 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_24 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_17 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_18 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_19 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_27 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_28 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_29 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_30 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_18 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_32 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_10 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_34 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_24 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_36 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_25 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_26 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_27 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_38 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_39 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_40 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_41 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_26 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_44 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_14 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_46 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_32 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_48 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_33 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_34 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_35 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_49 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_50 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_51 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_52 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_34 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_56 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_18 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_58 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_40 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_60 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_41 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_42 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_43 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_60 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_61 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_62 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_63 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_42 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_68 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_22 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_70 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_48 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_72 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_49 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_50 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_51 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_71 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_72 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_73 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_74 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_50 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_80 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_26 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_82 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_56 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_84 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_57 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_58 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_59 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_82 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_83 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_84 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_85 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_58 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_92 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_30 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_94 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_64 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_96 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_65 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_66 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_67 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_93 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_94 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_95 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_96 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_66 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_104 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_34 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_106 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_72 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_108 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_73 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_74 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_75 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_104 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_105 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_106 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_107 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_74 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_116 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_38 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_118 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_80 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_120 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_81 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_82 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_83 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_115 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_116 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_117 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_118 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_82 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_128 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_42 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_130 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_88 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_132 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_89 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_90 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + permute_91 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32) + getitem_126 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32) + getitem_127 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32) + getitem_128 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + getitem_129 = rand_strided((), (), device='cuda:0', dtype=torch.int64) + mul_90 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + view_140 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32) + addmm_46 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + view_142 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + mul_96 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32) + full_default = rand_strided((1, ), (1, ), device='cuda:0', dtype=torch.int64) + view_144 = rand_strided((1, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_99 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_101 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_105 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_1 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_109 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_117 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_2 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_121 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_125 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_3 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_129 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_137 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_4 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_141 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_145 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_5 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_149 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_157 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_6 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_161 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_165 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_7 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_169 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_177 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_8 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_181 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_185 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_9 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_189 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_197 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_10 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_201 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_205 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_11 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_209 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_217 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_12 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_221 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_225 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_13 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_229 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_237 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_14 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_241 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_245 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_15 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_249 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_257 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_16 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_261 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_265 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_17 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_269 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_277 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_18 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_281 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_285 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_19 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_289 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_297 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_20 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_301 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_305 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_21 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_309 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_317 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_22 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_321 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + permute_325 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_23 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + permute_329 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + permute_337 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + div_24 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32) + tangents_1 = rand_strided((1, 1, 50304), (50304, 50304, 1), device='cuda:0', dtype=torch.float32) + fn = lambda: call([primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1]) + return print_performance(fn, times=times, repeat=repeat) + + + if __name__ == "__main__": + from torch._inductor.wrapper_benchmark import compiled_module_main + compiled_module_main('nanogpt', benchmark_compiled_module) + +V0806 13:56:05.012000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cb330bc8adf17d5898bfa70fc4423e7d"} + { + "name": "code_gen", + "ts": 1722977765012266.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.012000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9a7b7431fbf8818663bc6d9b02ce0e2f"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977765012451.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.128000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "13ac313518e4246cb8dc2903f2b97b4b"} + { + "name": "fx_graph_cache_miss", + "ts": 1722977761862518.2, + "args": { + "key": "foijwxq2i7flux6r2ba5gws3rpialjqk5cmhfg54f7i2spz557vl", + "cache_state": "miss", + "components": [ + "[4hgegienmiaqunsqbxyycnrivovz4r63bypl5psmqilwotq5er6] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1):\n view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]); tangents_1 = None\n permute_97 = torch.ops.aten.permute.default(view_146, [1, 0])\n mm_1 = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None\n permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None\n mm_2 = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None\n view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]); mm_2 = None\n permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None\n full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True); full_default_1 = full_default = view_147 = None\n mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148); primals_148 = None\n mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768)\n sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)\n mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96); mul_99 = None\n sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None\n mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2); sum_2 = None\n sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None\n sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None\n mul_103 = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None\n mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96); mul_96 = None\n sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None\n sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]); index_put = None\n view_148 = torch.ops.aten.view.default(mul_103, [64, 768])\n mm_3 = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None\n permute_102 = torch.ops.aten.permute.default(view_148, [1, 0])\n mm_4 = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None\n permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None\n sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None\n view_149 = torch.ops.aten.view.default(sum_5, [768]); sum_5 = None\n permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None\n view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]); mm_3 = None\n view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]); addmm_46 = None\n mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None\n pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None\n add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None\n mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None\n tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None\n add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0)\n mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None\n mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11); tanh_11 = None\n sub_28 = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None\n mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None\n mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None\n mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715)\n pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None\n mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None\n mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None\n add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None\n mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None\n add_100 = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None\n view_151 = torch.ops.aten.view.default(add_100, [64, 3072]); add_100 = None\n mm_5 = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None\n permute_106 = torch.ops.aten.permute.default(view_151, [1, 0])\n mm_6 = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None\n permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None\n sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None\n view_152 = torch.ops.aten.view.default(sum_6, [3072]); sum_6 = None\n permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None\n view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]); mm_5 = None\n mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None\n mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768)\n sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)\n mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90); mul_115 = None\n sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None\n mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8); sum_8 = None\n sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None\n sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None\n mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None\n mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90); mul_90 = None\n sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None\n sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None\n add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None\n view_154 = torch.ops.aten.view.default(add_101, [64, 768])\n mm_7 = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None\n permute_110 = torch.ops.aten.permute.default(view_154, [1, 0])\n permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None\n view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None\n mm_8 = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None\n permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None\n sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None\n view_155 = torch.ops.aten.view.default(sum_11, [768]); sum_11 = None\n permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None\n view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]); mm_7 = None\n view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]); view_156 = None\n permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None\n _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None\n getitem_134 = _scaled_dot_product_efficient_attention_backward[0]\n getitem_135 = _scaled_dot_product_efficient_attention_backward[1]\n getitem_136 = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None\n permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None\n view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]); permute_114 = None\n permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None\n view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]); permute_115 = None\n permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None\n view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]); permute_116 = None\n cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None\n view_161 = torch.ops.aten.view.default(cat, [64, 2304]); cat = None\n mm_9 = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None\n permute_118 = torch.ops.aten.permute.default(view_161, [1, 0])\n mm_10 = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None\n permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None\n sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None\n view_162 = torch.ops.aten.view.default(sum_12, [2304]); sum_12 = None\n permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None\n view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]); mm_9 = None\n mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None\n mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768)\n sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)\n mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88); mul_122 = None\n sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None\n mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14); sum_14 = None\n sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None\n sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None\n mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None\n mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88); mul_88 = None\n sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None\n sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None\n add_102 = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None\n view_164 = torch.ops.aten.view.default(add_102, [64, 768])\n mm_11 = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None\n permute_122 = torch.ops.aten.permute.default(view_164, [1, 0])\n mm_12 = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None\n permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None\n sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None\n view_165 = torch.ops.aten.view.default(sum_17, [768]); sum_17 = None\n permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None\n view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]); mm_11 = None\n view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]); addmm_42 = None\n mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None\n pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None\n add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None\n mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None\n tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None\n add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0)\n mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None\n mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10); tanh_10 = None\n sub_35 = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None\n mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None\n mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None\n mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715)\n pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None\n mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None\n mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None\n add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None\n mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None\n add_104 = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None\n view_167 = torch.ops.aten.view.default(add_104, [64, 3072]); add_104 = None\n mm_13 = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None\n permute_126 = torch.ops.aten.permute.default(view_167, [1, 0])\n mm_14 = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None\n permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None\n sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None\n view_168 = torch.ops.aten.view.default(sum_18, [3072]); sum_18 = None\n permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None\n view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]); mm_13 = None\n mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None\n mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768)\n sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)\n mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82); mul_138 = None\n sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None\n mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20); sum_20 = None\n sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None\n sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None\n mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None\n mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82); mul_82 = None\n sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None\n sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None\n add_105 = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None\n view_170 = torch.ops.aten.view.default(add_105, [64, 768])\n mm_15 = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None\n permute_130 = torch.ops.aten.permute.default(view_170, [1, 0])\n permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None\n view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None\n mm_16 = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None\n permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None\n sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None\n view_171 = torch.ops.aten.view.default(sum_23, [768]); sum_23 = None\n permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None\n view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]); mm_15 = None\n view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]); view_172 = None\n permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None\n _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None\n getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0]\n getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1]\n getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None\n permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None\n view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]); permute_134 = None\n permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None\n view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]); permute_135 = None\n permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None\n view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]); permute_136 = None\n cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None\n view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]); cat_1 = None\n mm_17 = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None\n permute_138 = torch.ops.aten.permute.default(view_177, [1, 0])\n mm_18 = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None\n permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None\n sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None\n view_178 = torch.ops.aten.view.default(sum_24, [2304]); sum_24 = None\n permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None\n view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]); mm_17 = None\n mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None\n mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768)\n sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)\n mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80); mul_145 = None\n sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None\n mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26); sum_26 = None\n sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None\n sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None\n mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None\n mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80); mul_80 = None\n sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None\n sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None\n add_106 = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None\n view_180 = torch.ops.aten.view.default(add_106, [64, 768])\n mm_19 = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None\n permute_142 = torch.ops.aten.permute.default(view_180, [1, 0])\n mm_20 = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None\n permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None\n sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None\n view_181 = torch.ops.aten.view.default(sum_29, [768]); sum_29 = None\n permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None\n view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]); mm_19 = None\n view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]); addmm_38 = None\n mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None\n pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None\n add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None\n mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None\n tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None\n add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0)\n mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None\n mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9); tanh_9 = None\n sub_42 = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None\n mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None\n mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None\n mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715)\n pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None\n mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None\n mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None\n add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None\n mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None\n add_108 = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None\n view_183 = torch.ops.aten.view.default(add_108, [64, 3072]); add_108 = None\n mm_21 = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None\n permute_146 = torch.ops.aten.permute.default(view_183, [1, 0])\n mm_22 = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None\n permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None\n sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None\n view_184 = torch.ops.aten.view.default(sum_30, [3072]); sum_30 = None\n permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None\n view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]); mm_21 = None\n mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None\n mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768)\n sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)\n mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74); mul_161 = None\n sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None\n mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32); sum_32 = None\n sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None\n sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None\n mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None\n mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74); mul_74 = None\n sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None\n sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None\n add_109 = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None\n view_186 = torch.ops.aten.view.default(add_109, [64, 768])\n mm_23 = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None\n permute_150 = torch.ops.aten.permute.default(view_186, [1, 0])\n permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None\n view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None\n mm_24 = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None\n permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None\n sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None\n view_187 = torch.ops.aten.view.default(sum_35, [768]); sum_35 = None\n permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None\n view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]); mm_23 = None\n view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]); view_188 = None\n permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None\n _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None\n getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0]\n getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1]\n getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None\n permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None\n view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]); permute_154 = None\n permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None\n view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]); permute_155 = None\n permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None\n view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]); permute_156 = None\n cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None\n view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]); cat_2 = None\n mm_25 = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None\n permute_158 = torch.ops.aten.permute.default(view_193, [1, 0])\n mm_26 = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None\n permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None\n sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None\n view_194 = torch.ops.aten.view.default(sum_36, [2304]); sum_36 = None\n permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None\n view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]); mm_25 = None\n mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None\n mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768)\n sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)\n mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72); mul_168 = None\n sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None\n mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38); sum_38 = None\n sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None\n sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None\n mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None\n mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72); mul_72 = None\n sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None\n sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None\n add_110 = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None\n view_196 = torch.ops.aten.view.default(add_110, [64, 768])\n mm_27 = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None\n permute_162 = torch.ops.aten.permute.default(view_196, [1, 0])\n mm_28 = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None\n permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None\n sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None\n view_197 = torch.ops.aten.view.default(sum_41, [768]); sum_41 = None\n permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None\n view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]); mm_27 = None\n view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]); addmm_34 = None\n mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None\n pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None\n add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None\n mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None\n tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None\n add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0)\n mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None\n mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8); tanh_8 = None\n sub_49 = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None\n mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None\n mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None\n mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715)\n pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None\n mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None\n mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None\n add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None\n mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None\n add_112 = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None\n view_199 = torch.ops.aten.view.default(add_112, [64, 3072]); add_112 = None\n mm_29 = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None\n permute_166 = torch.ops.aten.permute.default(view_199, [1, 0])\n mm_30 = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None\n permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None\n sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None\n view_200 = torch.ops.aten.view.default(sum_42, [3072]); sum_42 = None\n permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None\n view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]); mm_29 = None\n mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None\n mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768)\n sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)\n mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66); mul_184 = None\n sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None\n mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44); sum_44 = None\n sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None\n sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None\n mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None\n mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66); mul_66 = None\n sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None\n sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None\n add_113 = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None\n view_202 = torch.ops.aten.view.default(add_113, [64, 768])\n mm_31 = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None\n permute_170 = torch.ops.aten.permute.default(view_202, [1, 0])\n permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None\n view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None\n mm_32 = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None\n permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None\n sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None\n view_203 = torch.ops.aten.view.default(sum_47, [768]); sum_47 = None\n permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None\n view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]); mm_31 = None\n view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]); view_204 = None\n permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None\n _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None\n getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0]\n getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1]\n getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None\n permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None\n view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]); permute_174 = None\n permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None\n view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]); permute_175 = None\n permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None\n view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]); permute_176 = None\n cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None\n view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]); cat_3 = None\n mm_33 = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None\n permute_178 = torch.ops.aten.permute.default(view_209, [1, 0])\n mm_34 = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None\n permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None\n sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None\n view_210 = torch.ops.aten.view.default(sum_48, [2304]); sum_48 = None\n permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None\n view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]); mm_33 = None\n mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None\n mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768)\n sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)\n mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64); mul_191 = None\n sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None\n mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50); sum_50 = None\n sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None\n sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None\n mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None\n mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64); mul_64 = None\n sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None\n sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None\n add_114 = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None\n view_212 = torch.ops.aten.view.default(add_114, [64, 768])\n mm_35 = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None\n permute_182 = torch.ops.aten.permute.default(view_212, [1, 0])\n mm_36 = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None\n permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None\n sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None\n view_213 = torch.ops.aten.view.default(sum_53, [768]); sum_53 = None\n permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None\n view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]); mm_35 = None\n view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]); addmm_30 = None\n mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None\n pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None\n add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None\n mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None\n tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None\n add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0)\n mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None\n mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7); tanh_7 = None\n sub_56 = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None\n mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None\n mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None\n mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715)\n pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None\n mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None\n mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None\n add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None\n mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None\n add_116 = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None\n view_215 = torch.ops.aten.view.default(add_116, [64, 3072]); add_116 = None\n mm_37 = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None\n permute_186 = torch.ops.aten.permute.default(view_215, [1, 0])\n mm_38 = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None\n permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None\n sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None\n view_216 = torch.ops.aten.view.default(sum_54, [3072]); sum_54 = None\n permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None\n view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]); mm_37 = None\n mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None\n mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768)\n sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)\n mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58); mul_207 = None\n sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None\n mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56); sum_56 = None\n sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None\n sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None\n mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None\n mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58); mul_58 = None\n sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None\n sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None\n add_117 = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None\n view_218 = torch.ops.aten.view.default(add_117, [64, 768])\n mm_39 = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None\n permute_190 = torch.ops.aten.permute.default(view_218, [1, 0])\n permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None\n view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None\n mm_40 = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None\n permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None\n sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None\n view_219 = torch.ops.aten.view.default(sum_59, [768]); sum_59 = None\n permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None\n view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]); mm_39 = None\n view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]); view_220 = None\n permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None\n _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None\n getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0]\n getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1]\n getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None\n permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None\n view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]); permute_194 = None\n permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None\n view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]); permute_195 = None\n permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None\n view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]); permute_196 = None\n cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None\n view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]); cat_4 = None\n mm_41 = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None\n permute_198 = torch.ops.aten.permute.default(view_225, [1, 0])\n mm_42 = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None\n permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None\n sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None\n view_226 = torch.ops.aten.view.default(sum_60, [2304]); sum_60 = None\n permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None\n view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]); mm_41 = None\n mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None\n mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768)\n sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)\n mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56); mul_214 = None\n sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None\n mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62); sum_62 = None\n sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None\n sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None\n mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None\n mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56); mul_56 = None\n sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None\n sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None\n add_118 = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None\n view_228 = torch.ops.aten.view.default(add_118, [64, 768])\n mm_43 = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None\n permute_202 = torch.ops.aten.permute.default(view_228, [1, 0])\n mm_44 = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None\n permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None\n sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None\n view_229 = torch.ops.aten.view.default(sum_65, [768]); sum_65 = None\n permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None\n view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]); mm_43 = None\n view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]); addmm_26 = None\n mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None\n pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None\n add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None\n mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None\n tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None\n add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0)\n mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None\n mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6); tanh_6 = None\n sub_63 = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None\n mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None\n mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None\n mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715)\n pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None\n mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None\n mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None\n add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None\n mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None\n add_120 = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None\n view_231 = torch.ops.aten.view.default(add_120, [64, 3072]); add_120 = None\n mm_45 = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None\n permute_206 = torch.ops.aten.permute.default(view_231, [1, 0])\n mm_46 = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None\n permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None\n sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None\n view_232 = torch.ops.aten.view.default(sum_66, [3072]); sum_66 = None\n permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None\n view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]); mm_45 = None\n mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None\n mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768)\n sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)\n mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50); mul_230 = None\n sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None\n mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68); sum_68 = None\n sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None\n sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None\n mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None\n mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50); mul_50 = None\n sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None\n sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None\n add_121 = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None\n view_234 = torch.ops.aten.view.default(add_121, [64, 768])\n mm_47 = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None\n permute_210 = torch.ops.aten.permute.default(view_234, [1, 0])\n permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None\n view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None\n mm_48 = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None\n permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None\n sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None\n view_235 = torch.ops.aten.view.default(sum_71, [768]); sum_71 = None\n permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None\n view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]); mm_47 = None\n view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]); view_236 = None\n permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None\n _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None\n getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0]\n getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1]\n getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None\n permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None\n view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]); permute_214 = None\n permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None\n view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]); permute_215 = None\n permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None\n view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]); permute_216 = None\n cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None\n view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]); cat_5 = None\n mm_49 = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None\n permute_218 = torch.ops.aten.permute.default(view_241, [1, 0])\n mm_50 = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None\n permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None\n sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None\n view_242 = torch.ops.aten.view.default(sum_72, [2304]); sum_72 = None\n permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None\n view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]); mm_49 = None\n mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None\n mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768)\n sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)\n mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48); mul_237 = None\n sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None\n mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74); sum_74 = None\n sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None\n sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None\n mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None\n mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48); mul_48 = None\n sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None\n sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None\n add_122 = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None\n view_244 = torch.ops.aten.view.default(add_122, [64, 768])\n mm_51 = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None\n permute_222 = torch.ops.aten.permute.default(view_244, [1, 0])\n mm_52 = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None\n permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None\n sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None\n view_245 = torch.ops.aten.view.default(sum_77, [768]); sum_77 = None\n permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None\n view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]); mm_51 = None\n view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]); addmm_22 = None\n mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None\n pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None\n add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None\n mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None\n tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None\n add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0)\n mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None\n mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5); tanh_5 = None\n sub_70 = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None\n mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None\n mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None\n mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715)\n pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None\n mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None\n mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None\n add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None\n mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None\n add_124 = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None\n view_247 = torch.ops.aten.view.default(add_124, [64, 3072]); add_124 = None\n mm_53 = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None\n permute_226 = torch.ops.aten.permute.default(view_247, [1, 0])\n mm_54 = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None\n permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None\n sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None\n view_248 = torch.ops.aten.view.default(sum_78, [3072]); sum_78 = None\n permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None\n view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]); mm_53 = None\n mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None\n mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768)\n sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)\n mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42); mul_253 = None\n sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None\n mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80); sum_80 = None\n sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None\n sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None\n mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None\n mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42); mul_42 = None\n sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None\n sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None\n add_125 = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None\n view_250 = torch.ops.aten.view.default(add_125, [64, 768])\n mm_55 = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None\n permute_230 = torch.ops.aten.permute.default(view_250, [1, 0])\n permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None\n view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None\n mm_56 = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None\n permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None\n sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None\n view_251 = torch.ops.aten.view.default(sum_83, [768]); sum_83 = None\n permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None\n view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]); mm_55 = None\n view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]); view_252 = None\n permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None\n _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None\n getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0]\n getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1]\n getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None\n permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None\n view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]); permute_234 = None\n permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None\n view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]); permute_235 = None\n permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None\n view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]); permute_236 = None\n cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None\n view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]); cat_6 = None\n mm_57 = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None\n permute_238 = torch.ops.aten.permute.default(view_257, [1, 0])\n mm_58 = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None\n permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None\n sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None\n view_258 = torch.ops.aten.view.default(sum_84, [2304]); sum_84 = None\n permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None\n view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]); mm_57 = None\n mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None\n mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768)\n sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)\n mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40); mul_260 = None\n sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None\n mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86); sum_86 = None\n sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None\n sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None\n mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None\n mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40); mul_40 = None\n sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None\n sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None\n add_126 = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None\n view_260 = torch.ops.aten.view.default(add_126, [64, 768])\n mm_59 = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None\n permute_242 = torch.ops.aten.permute.default(view_260, [1, 0])\n mm_60 = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None\n permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None\n sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None\n view_261 = torch.ops.aten.view.default(sum_89, [768]); sum_89 = None\n permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None\n view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]); mm_59 = None\n view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]); addmm_18 = None\n mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None\n pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None\n add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None\n mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None\n tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None\n add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0)\n mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None\n mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4); tanh_4 = None\n sub_77 = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None\n mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None\n mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None\n mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715)\n pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None\n mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None\n mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None\n add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None\n mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None\n add_128 = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None\n view_263 = torch.ops.aten.view.default(add_128, [64, 3072]); add_128 = None\n mm_61 = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None\n permute_246 = torch.ops.aten.permute.default(view_263, [1, 0])\n mm_62 = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None\n permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None\n sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None\n view_264 = torch.ops.aten.view.default(sum_90, [3072]); sum_90 = None\n permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None\n view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]); mm_61 = None\n mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None\n mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768)\n sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)\n mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34); mul_276 = None\n sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None\n mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92); sum_92 = None\n sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None\n sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None\n mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None\n mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34); mul_34 = None\n sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None\n sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None\n add_129 = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None\n view_266 = torch.ops.aten.view.default(add_129, [64, 768])\n mm_63 = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None\n permute_250 = torch.ops.aten.permute.default(view_266, [1, 0])\n permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None\n view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None\n mm_64 = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None\n permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None\n sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None\n view_267 = torch.ops.aten.view.default(sum_95, [768]); sum_95 = None\n permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None\n view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]); mm_63 = None\n view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]); view_268 = None\n permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None\n _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None\n getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0]\n getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1]\n getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None\n permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None\n view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]); permute_254 = None\n permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None\n view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]); permute_255 = None\n permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None\n view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]); permute_256 = None\n cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None\n view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]); cat_7 = None\n mm_65 = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None\n permute_258 = torch.ops.aten.permute.default(view_273, [1, 0])\n mm_66 = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None\n permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None\n sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None\n view_274 = torch.ops.aten.view.default(sum_96, [2304]); sum_96 = None\n permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None\n view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]); mm_65 = None\n mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None\n mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768)\n sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)\n mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32); mul_283 = None\n sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None\n mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98); sum_98 = None\n sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None\n sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None\n mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None\n mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32); mul_32 = None\n sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None\n sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None\n add_130 = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None\n view_276 = torch.ops.aten.view.default(add_130, [64, 768])\n mm_67 = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None\n permute_262 = torch.ops.aten.permute.default(view_276, [1, 0])\n mm_68 = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None\n permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None\n sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None\n view_277 = torch.ops.aten.view.default(sum_101, [768]); sum_101 = None\n permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None\n view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]); mm_67 = None\n view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]); addmm_14 = None\n mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None\n pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None\n add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None\n mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None\n tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None\n add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0)\n mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None\n mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3); tanh_3 = None\n sub_84 = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None\n mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None\n mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None\n mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715)\n pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None\n mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None\n mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None\n add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None\n mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None\n add_132 = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None\n view_279 = torch.ops.aten.view.default(add_132, [64, 3072]); add_132 = None\n mm_69 = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None\n permute_266 = torch.ops.aten.permute.default(view_279, [1, 0])\n mm_70 = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None\n permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None\n sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None\n view_280 = torch.ops.aten.view.default(sum_102, [3072]); sum_102 = None\n permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None\n view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]); mm_69 = None\n mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None\n mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768)\n sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)\n mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26); mul_299 = None\n sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None\n mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104); sum_104 = None\n sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None\n sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None\n mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None\n mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26); mul_26 = None\n sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None\n sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None\n add_133 = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None\n view_282 = torch.ops.aten.view.default(add_133, [64, 768])\n mm_71 = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None\n permute_270 = torch.ops.aten.permute.default(view_282, [1, 0])\n permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None\n view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None\n mm_72 = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None\n permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None\n sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None\n view_283 = torch.ops.aten.view.default(sum_107, [768]); sum_107 = None\n permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None\n view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]); mm_71 = None\n view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]); view_284 = None\n permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None\n _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None\n getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0]\n getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1]\n getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None\n permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None\n view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]); permute_274 = None\n permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None\n view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]); permute_275 = None\n permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None\n view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]); permute_276 = None\n cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None\n view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]); cat_8 = None\n mm_73 = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None\n permute_278 = torch.ops.aten.permute.default(view_289, [1, 0])\n mm_74 = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None\n permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None\n sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None\n view_290 = torch.ops.aten.view.default(sum_108, [2304]); sum_108 = None\n permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None\n view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]); mm_73 = None\n mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None\n mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768)\n sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)\n mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24); mul_306 = None\n sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None\n mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110); sum_110 = None\n sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None\n sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None\n mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None\n mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24); mul_24 = None\n sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None\n sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None\n add_134 = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None\n view_292 = torch.ops.aten.view.default(add_134, [64, 768])\n mm_75 = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None\n permute_282 = torch.ops.aten.permute.default(view_292, [1, 0])\n mm_76 = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None\n permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None\n sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None\n view_293 = torch.ops.aten.view.default(sum_113, [768]); sum_113 = None\n permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None\n view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]); mm_75 = None\n view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]); addmm_10 = None\n mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None\n pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None\n add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None\n mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None\n tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None\n add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0)\n mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None\n mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2); tanh_2 = None\n sub_91 = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None\n mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None\n mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None\n mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715)\n pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None\n mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None\n mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None\n add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None\n mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None\n add_136 = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None\n view_295 = torch.ops.aten.view.default(add_136, [64, 3072]); add_136 = None\n mm_77 = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None\n permute_286 = torch.ops.aten.permute.default(view_295, [1, 0])\n mm_78 = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None\n permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None\n sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None\n view_296 = torch.ops.aten.view.default(sum_114, [3072]); sum_114 = None\n permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None\n view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]); mm_77 = None\n mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None\n mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768)\n sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)\n mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18); mul_322 = None\n sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None\n mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116); sum_116 = None\n sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None\n sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None\n mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None\n mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18); mul_18 = None\n sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None\n sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None\n add_137 = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None\n view_298 = torch.ops.aten.view.default(add_137, [64, 768])\n mm_79 = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None\n permute_290 = torch.ops.aten.permute.default(view_298, [1, 0])\n permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None\n view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None\n mm_80 = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None\n permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None\n sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None\n view_299 = torch.ops.aten.view.default(sum_119, [768]); sum_119 = None\n permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None\n view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]); mm_79 = None\n view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]); view_300 = None\n permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None\n _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None\n getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0]\n getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1]\n getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None\n permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None\n view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]); permute_294 = None\n permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None\n view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]); permute_295 = None\n permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None\n view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]); permute_296 = None\n cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None\n view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]); cat_9 = None\n mm_81 = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None\n permute_298 = torch.ops.aten.permute.default(view_305, [1, 0])\n mm_82 = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None\n permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None\n sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None\n view_306 = torch.ops.aten.view.default(sum_120, [2304]); sum_120 = None\n permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None\n view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]); mm_81 = None\n mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None\n mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768)\n sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)\n mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16); mul_329 = None\n sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None\n mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122); sum_122 = None\n sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None\n sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None\n mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None\n mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16); mul_16 = None\n sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None\n sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None\n add_138 = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None\n view_308 = torch.ops.aten.view.default(add_138, [64, 768])\n mm_83 = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None\n permute_302 = torch.ops.aten.permute.default(view_308, [1, 0])\n mm_84 = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None\n permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None\n sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None\n view_309 = torch.ops.aten.view.default(sum_125, [768]); sum_125 = None\n permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None\n view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]); mm_83 = None\n view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]); addmm_6 = None\n mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None\n pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None\n add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None\n mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None\n tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None\n add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0)\n mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None\n mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1); tanh_1 = None\n sub_98 = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None\n mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None\n mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None\n mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715)\n pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None\n mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None\n mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None\n add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None\n mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None\n add_140 = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None\n view_311 = torch.ops.aten.view.default(add_140, [64, 3072]); add_140 = None\n mm_85 = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None\n permute_306 = torch.ops.aten.permute.default(view_311, [1, 0])\n mm_86 = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None\n permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None\n sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None\n view_312 = torch.ops.aten.view.default(sum_126, [3072]); sum_126 = None\n permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None\n view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]); mm_85 = None\n mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None\n mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768)\n sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)\n mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10); mul_345 = None\n sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None\n mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128); sum_128 = None\n sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None\n sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None\n mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None\n mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10); mul_10 = None\n sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None\n sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None\n add_141 = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None\n view_314 = torch.ops.aten.view.default(add_141, [64, 768])\n mm_87 = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None\n permute_310 = torch.ops.aten.permute.default(view_314, [1, 0])\n permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None\n view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None\n mm_88 = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None\n permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None\n sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None\n view_315 = torch.ops.aten.view.default(sum_131, [768]); sum_131 = None\n permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None\n view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]); mm_87 = None\n view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]); view_316 = None\n permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None\n _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None\n getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0]\n getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1]\n getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None\n permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None\n view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]); permute_314 = None\n permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None\n view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]); permute_315 = None\n permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None\n view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]); permute_316 = None\n cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None\n view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]); cat_10 = None\n mm_89 = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None\n permute_318 = torch.ops.aten.permute.default(view_321, [1, 0])\n mm_90 = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None\n permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None\n sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None\n view_322 = torch.ops.aten.view.default(sum_132, [2304]); sum_132 = None\n permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None\n view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]); mm_89 = None\n mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None\n mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768)\n sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)\n mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8); mul_352 = None\n sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None\n mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134); sum_134 = None\n sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None\n sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None\n mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None\n mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8); mul_8 = None\n sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None\n sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None\n add_142 = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None\n view_324 = torch.ops.aten.view.default(add_142, [64, 768])\n mm_91 = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None\n permute_322 = torch.ops.aten.permute.default(view_324, [1, 0])\n mm_92 = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None\n permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None\n sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None\n view_325 = torch.ops.aten.view.default(sum_137, [768]); sum_137 = None\n permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None\n view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]); mm_91 = None\n view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]); addmm_2 = None\n mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None\n pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None\n add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None\n mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None\n tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None\n add_7 = torch.ops.aten.add.Tensor(tanh, 1.0)\n mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None\n mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh); tanh = None\n sub_105 = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None\n mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None\n mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None\n mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715)\n pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None\n mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None\n mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None\n add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None\n mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None\n add_144 = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None\n view_327 = torch.ops.aten.view.default(add_144, [64, 3072]); add_144 = None\n mm_93 = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None\n permute_326 = torch.ops.aten.permute.default(view_327, [1, 0])\n mm_94 = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None\n permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None\n sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None\n view_328 = torch.ops.aten.view.default(sum_138, [3072]); sum_138 = None\n permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None\n view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]); mm_93 = None\n mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None\n mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768)\n sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)\n mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2); mul_368 = None\n sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None\n mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140); sum_140 = None\n sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None\n sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None\n mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None\n mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2); mul_2 = None\n sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None\n sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None\n add_145 = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None\n view_330 = torch.ops.aten.view.default(add_145, [64, 768])\n mm_95 = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None\n permute_330 = torch.ops.aten.permute.default(view_330, [1, 0])\n permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None\n view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None\n mm_96 = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None\n permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None\n sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None\n view_331 = torch.ops.aten.view.default(sum_143, [768]); sum_143 = None\n permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None\n view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]); mm_95 = None\n view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]); view_332 = None\n permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None\n _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None\n getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0]\n getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1]\n getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None\n permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None\n view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]); permute_334 = None\n permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None\n view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]); permute_335 = None\n permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None\n view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]); permute_336 = None\n cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None\n view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]); cat_11 = None\n mm_97 = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None\n permute_338 = torch.ops.aten.permute.default(view_337, [1, 0])\n mm_98 = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None\n permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None\n sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None\n view_338 = torch.ops.aten.view.default(sum_144, [2304]); sum_144 = None\n permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None\n view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]); mm_97 = None\n mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None\n mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768)\n sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)\n mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul); mul_375 = None\n sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None\n mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146); sum_146 = None\n sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None\n sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None\n mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None\n mul_380 = torch.ops.aten.mul.Tensor(view_339, mul); mul = None\n sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None\n sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None\n add_146 = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None\n eq = torch.ops.aten.eq.Scalar(unsqueeze, -1)\n unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1); eq = None\n full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146); unsqueeze_1 = None\n full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True); full_default_5 = unsqueeze = where = None\n eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1)\n unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None\n where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146); unsqueeze_2 = full_default_4 = add_146 = None\n full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True); full_default_7 = primals_1 = where_1 = None\n add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None\n return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)\n \n# To see more debug info, please use `graph_module.print_readable()`", + "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[26]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[34]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[35]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[47]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[48]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[60]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[61]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[73]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[74]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[86]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[87]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[99]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[100]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[112]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[113]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[125]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[126]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[138]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[139]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[151]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[152]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[164]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[165]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[177]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[178]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aba44qxan7tyih7ljdxyqka53vkn25cmdzgth5cyl2s7qorx7vi] example_inputs[184]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[qitbyzr7emyctium3gjpb6gcr75vrxwd24qiyojnre7qqd7zo4f] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[u55blbbc73afkevwx6ofprgxxytl7dbrkgoal4z3b6od3qdlugs] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 1, 50304]), stride=(50304, 50304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", + "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_backward]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", + "[y3e3yuxtssnww62nt5exdblxjs4qqfe6m45lbogy57sjgkkgd7s] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259]", + "[gqceoov337f2fxydib545wytq5n6f565gacorvs7dzatfibcnjq] fx_kwargs[user_visible_outputs]: {'add_147': None, '_unsafe_index_put': None, 'sum_147': None, 'sum_148': None, 'permute_340': None, 'view_338': None, 'permute_332': None, 'view_331': None, 'sum_141': None, 'sum_142': None, 'permute_328': None, 'view_328': None, 'permute_324': None, 'view_325': None, 'sum_135': None, 'sum_136': None, 'permute_320': None, 'view_322': None, 'permute_312': None, 'view_315': None, 'sum_129': None, 'sum_130': None, 'permute_308': None, 'view_312': None, 'permute_304': None, 'view_309': None, 'sum_123': None, 'sum_124': None, 'permute_300': None, 'view_306': None, 'permute_292': None, 'view_299': None, 'sum_117': None, 'sum_118': None, 'permute_288': None, 'view_296': None, 'permute_284': None, 'view_293': None, 'sum_111': None, 'sum_112': None, 'permute_280': None, 'view_290': None, 'permute_272': None, 'view_283': None, 'sum_105': None, 'sum_106': None, 'permute_268': None, 'view_280': None, 'permute_264': None, 'view_277': None, 'sum_99': None, 'sum_100': None, 'permute_260': None, 'view_274': None, 'permute_252': None, 'view_267': None, 'sum_93': None, 'sum_94': None, 'permute_248': None, 'view_264': None, 'permute_244': None, 'view_261': None, 'sum_87': None, 'sum_88': None, 'permute_240': None, 'view_258': None, 'permute_232': None, 'view_251': None, 'sum_81': None, 'sum_82': None, 'permute_228': None, 'view_248': None, 'permute_224': None, 'view_245': None, 'sum_75': None, 'sum_76': None, 'permute_220': None, 'view_242': None, 'permute_212': None, 'view_235': None, 'sum_69': None, 'sum_70': None, 'permute_208': None, 'view_232': None, 'permute_204': None, 'view_229': None, 'sum_63': None, 'sum_64': None, 'permute_200': None, 'view_226': None, 'permute_192': None, 'view_219': None, 'sum_57': None, 'sum_58': None, 'permute_188': None, 'view_216': None, 'permute_184': None, 'view_213': None, 'sum_51': None, 'sum_52': None, 'permute_180': None, 'view_210': None, 'permute_172': None, 'view_203': None, 'sum_45': None, 'sum_46': None, 'permute_168': None, 'view_200': None, 'permute_164': None, 'view_197': None, 'sum_39': None, 'sum_40': None, 'permute_160': None, 'view_194': None, 'permute_152': None, 'view_187': None, 'sum_33': None, 'sum_34': None, 'permute_148': None, 'view_184': None, 'permute_144': None, 'view_181': None, 'sum_27': None, 'sum_28': None, 'permute_140': None, 'view_178': None, 'permute_132': None, 'view_171': None, 'sum_21': None, 'sum_22': None, 'permute_128': None, 'view_168': None, 'permute_124': None, 'view_165': None, 'sum_15': None, 'sum_16': None, 'permute_120': None, 'view_162': None, 'permute_112': None, 'view_155': None, 'sum_9': None, 'sum_10': None, 'permute_108': None, 'view_152': None, 'permute_104': None, 'view_149': None, 'sum_3': None, 'sum_4': None}", + "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[0]: 260", + "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", + "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", + "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", + "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", + "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", + "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", + "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", + "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", + "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", + "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", + "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", + "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", + "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", + "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", + "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", + "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", + "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", + "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", + "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", + "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", + "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", + "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", + "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", + "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", + "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", + "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", + "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", + "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", + "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", + "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", + "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", + "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", + "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", + "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", + "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", + "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", + "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", + "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", + "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", + "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", + "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", + "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", + "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", + "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", + "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", + "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", + "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", + "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess" + ] + }, + "ph": "i", + "pid": 0, + "s": "p" + } +V0806 13:56:05.129000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "44fe4dd764a6fe29aa0b4a5ce2ca423f"} + {"key": "foijwxq2i7flux6r2ba5gws3rpialjqk5cmhfg54f7i2spz557vl", "cache_state": "miss", "components": ["[4hgegienmiaqunsqbxyycnrivovz4r63bypl5psmqilwotq5er6] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1):\n view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]); tangents_1 = None\n permute_97 = torch.ops.aten.permute.default(view_146, [1, 0])\n mm_1 = torch.ops.aten.mm.default(permute_97, view_144); permute_97 = view_144 = None\n permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]); mm_1 = None\n mm_2 = torch.ops.aten.mm.default(view_146, permute_99); view_146 = permute_99 = None\n view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]); mm_2 = None\n permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]); permute_98 = None\n full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True); full_default_1 = full_default = view_147 = None\n mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148); primals_148 = None\n mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768)\n sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)\n mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96); mul_99 = None\n sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True); mul_101 = None\n mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2); sum_2 = None\n sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1); mul_100 = sum_1 = None\n sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102); sub_26 = mul_102 = None\n mul_103 = torch.ops.aten.mul.Tensor(div, sub_27); div = sub_27 = None\n mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96); mul_96 = None\n sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]); mul_104 = None\n sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]); index_put = None\n view_148 = torch.ops.aten.view.default(mul_103, [64, 768])\n mm_3 = torch.ops.aten.mm.default(view_148, permute_101); permute_101 = None\n permute_102 = torch.ops.aten.permute.default(view_148, [1, 0])\n mm_4 = torch.ops.aten.mm.default(permute_102, view_142); permute_102 = view_142 = None\n permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]); mm_4 = None\n sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True); view_148 = None\n view_149 = torch.ops.aten.view.default(sum_5, [768]); sum_5 = None\n permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]); permute_103 = None\n view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]); mm_3 = None\n view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]); addmm_46 = None\n mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92); mul_92 = None\n pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715); pow_12 = None\n add_94 = torch.ops.aten.add.Tensor(view_141, mul_93); mul_93 = None\n mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654); add_94 = None\n tanh_11 = torch.ops.aten.tanh.default(mul_94); mul_94 = None\n add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0)\n mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95); view_150 = add_95 = None\n mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11); tanh_11 = None\n sub_28 = torch.ops.aten.sub.Tensor(1, mul_107); mul_107 = None\n mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28); mul_105 = sub_28 = None\n mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654); mul_108 = None\n mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715)\n pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0); view_141 = None\n mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0); pow_13 = None\n mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111); mul_110 = mul_111 = None\n add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112); mul_109 = mul_112 = None\n mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5); mul_106 = None\n add_100 = torch.ops.aten.add.Tensor(add_99, mul_113); add_99 = mul_113 = None\n view_151 = torch.ops.aten.view.default(add_100, [64, 3072]); add_100 = None\n mm_5 = torch.ops.aten.mm.default(view_151, permute_105); permute_105 = None\n permute_106 = torch.ops.aten.permute.default(view_151, [1, 0])\n mm_6 = torch.ops.aten.mm.default(permute_106, view_140); permute_106 = view_140 = None\n permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]); mm_6 = None\n sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True); view_151 = None\n view_152 = torch.ops.aten.view.default(sum_6, [3072]); sum_6 = None\n permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]); permute_107 = None\n view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]); mm_5 = None\n mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142); primals_142 = None\n mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768)\n sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)\n mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90); mul_115 = None\n sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True); mul_117 = None\n mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8); sum_8 = None\n sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7); mul_116 = sum_7 = None\n sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118); sub_30 = mul_118 = None\n mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31); div_1 = sub_31 = None\n mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90); mul_90 = None\n sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]); mul_120 = None\n sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]); view_153 = None\n add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119); mul_103 = mul_119 = None\n view_154 = torch.ops.aten.view.default(add_101, [64, 768])\n mm_7 = torch.ops.aten.mm.default(view_154, permute_109); permute_109 = None\n permute_110 = torch.ops.aten.permute.default(view_154, [1, 0])\n permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]); permute_92 = None\n view_138 = torch.ops.aten.view.default(view_137, [64, 768]); view_137 = None\n mm_8 = torch.ops.aten.mm.default(permute_110, view_138); permute_110 = view_138 = None\n permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]); mm_8 = None\n sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True); view_154 = None\n view_155 = torch.ops.aten.view.default(sum_11, [768]); sum_11 = None\n permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]); permute_111 = None\n view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]); mm_7 = None\n view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]); view_156 = None\n permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]); view_157 = None\n _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True); permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None\n getitem_134 = _scaled_dot_product_efficient_attention_backward[0]\n getitem_135 = _scaled_dot_product_efficient_attention_backward[1]\n getitem_136 = _scaled_dot_product_efficient_attention_backward[2]; _scaled_dot_product_efficient_attention_backward = None\n permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]); getitem_136 = None\n view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]); permute_114 = None\n permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]); getitem_134 = None\n view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]); permute_115 = None\n permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]); getitem_135 = None\n view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]); permute_116 = None\n cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2); view_159 = view_160 = view_158 = None\n view_161 = torch.ops.aten.view.default(cat, [64, 2304]); cat = None\n mm_9 = torch.ops.aten.mm.default(view_161, permute_117); permute_117 = None\n permute_118 = torch.ops.aten.permute.default(view_161, [1, 0])\n mm_10 = torch.ops.aten.mm.default(permute_118, view_132); permute_118 = view_132 = None\n permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]); mm_10 = None\n sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True); view_161 = None\n view_162 = torch.ops.aten.view.default(sum_12, [2304]); sum_12 = None\n permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]); permute_119 = None\n view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]); mm_9 = None\n mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136); primals_136 = None\n mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768)\n sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)\n mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88); mul_122 = None\n sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True); mul_124 = None\n mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14); sum_14 = None\n sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13); mul_123 = sum_13 = None\n sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125); sub_33 = mul_125 = None\n mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34); div_2 = sub_34 = None\n mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88); mul_88 = None\n sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]); mul_127 = None\n sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]); view_163 = None\n add_102 = torch.ops.aten.add.Tensor(add_101, mul_126); add_101 = mul_126 = None\n view_164 = torch.ops.aten.view.default(add_102, [64, 768])\n mm_11 = torch.ops.aten.mm.default(view_164, permute_121); permute_121 = None\n permute_122 = torch.ops.aten.permute.default(view_164, [1, 0])\n mm_12 = torch.ops.aten.mm.default(permute_122, view_130); permute_122 = view_130 = None\n permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]); mm_12 = None\n sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True); view_164 = None\n view_165 = torch.ops.aten.view.default(sum_17, [768]); sum_17 = None\n permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]); permute_123 = None\n view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]); mm_11 = None\n view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]); addmm_42 = None\n mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84); mul_84 = None\n pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715); pow_11 = None\n add_86 = torch.ops.aten.add.Tensor(view_129, mul_85); mul_85 = None\n mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654); add_86 = None\n tanh_10 = torch.ops.aten.tanh.default(mul_86); mul_86 = None\n add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0)\n mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87); view_166 = add_87 = None\n mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10); tanh_10 = None\n sub_35 = torch.ops.aten.sub.Tensor(1, mul_130); mul_130 = None\n mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35); mul_128 = sub_35 = None\n mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654); mul_131 = None\n mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715)\n pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0); view_129 = None\n mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0); pow_14 = None\n mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134); mul_133 = mul_134 = None\n add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135); mul_132 = mul_135 = None\n mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5); mul_129 = None\n add_104 = torch.ops.aten.add.Tensor(add_103, mul_136); add_103 = mul_136 = None\n view_167 = torch.ops.aten.view.default(add_104, [64, 3072]); add_104 = None\n mm_13 = torch.ops.aten.mm.default(view_167, permute_125); permute_125 = None\n permute_126 = torch.ops.aten.permute.default(view_167, [1, 0])\n mm_14 = torch.ops.aten.mm.default(permute_126, view_128); permute_126 = view_128 = None\n permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]); mm_14 = None\n sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True); view_167 = None\n view_168 = torch.ops.aten.view.default(sum_18, [3072]); sum_18 = None\n permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]); permute_127 = None\n view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]); mm_13 = None\n mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130); primals_130 = None\n mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768)\n sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)\n mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82); mul_138 = None\n sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True); mul_140 = None\n mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20); sum_20 = None\n sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19); mul_139 = sum_19 = None\n sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141); sub_37 = mul_141 = None\n mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38); div_3 = sub_38 = None\n mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82); mul_82 = None\n sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]); mul_143 = None\n sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]); view_169 = None\n add_105 = torch.ops.aten.add.Tensor(add_102, mul_142); add_102 = mul_142 = None\n view_170 = torch.ops.aten.view.default(add_105, [64, 768])\n mm_15 = torch.ops.aten.mm.default(view_170, permute_129); permute_129 = None\n permute_130 = torch.ops.aten.permute.default(view_170, [1, 0])\n permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]); permute_84 = None\n view_126 = torch.ops.aten.view.default(view_125, [64, 768]); view_125 = None\n mm_16 = torch.ops.aten.mm.default(permute_130, view_126); permute_130 = view_126 = None\n permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]); mm_16 = None\n sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True); view_170 = None\n view_171 = torch.ops.aten.view.default(sum_23, [768]); sum_23 = None\n permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]); permute_131 = None\n view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]); mm_15 = None\n view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]); view_172 = None\n permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]); view_173 = None\n _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True); permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None\n getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0]\n getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1]\n getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2]; _scaled_dot_product_efficient_attention_backward_1 = None\n permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]); getitem_140 = None\n view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]); permute_134 = None\n permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]); getitem_138 = None\n view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]); permute_135 = None\n permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]); getitem_139 = None\n view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]); permute_136 = None\n cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2); view_175 = view_176 = view_174 = None\n view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]); cat_1 = None\n mm_17 = torch.ops.aten.mm.default(view_177, permute_137); permute_137 = None\n permute_138 = torch.ops.aten.permute.default(view_177, [1, 0])\n mm_18 = torch.ops.aten.mm.default(permute_138, view_120); permute_138 = view_120 = None\n permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]); mm_18 = None\n sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True); view_177 = None\n view_178 = torch.ops.aten.view.default(sum_24, [2304]); sum_24 = None\n permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]); permute_139 = None\n view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]); mm_17 = None\n mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124); primals_124 = None\n mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768)\n sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)\n mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80); mul_145 = None\n sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True); mul_147 = None\n mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26); sum_26 = None\n sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25); mul_146 = sum_25 = None\n sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148); sub_40 = mul_148 = None\n mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41); div_4 = sub_41 = None\n mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80); mul_80 = None\n sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]); mul_150 = None\n sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]); view_179 = None\n add_106 = torch.ops.aten.add.Tensor(add_105, mul_149); add_105 = mul_149 = None\n view_180 = torch.ops.aten.view.default(add_106, [64, 768])\n mm_19 = torch.ops.aten.mm.default(view_180, permute_141); permute_141 = None\n permute_142 = torch.ops.aten.permute.default(view_180, [1, 0])\n mm_20 = torch.ops.aten.mm.default(permute_142, view_118); permute_142 = view_118 = None\n permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]); mm_20 = None\n sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True); view_180 = None\n view_181 = torch.ops.aten.view.default(sum_29, [768]); sum_29 = None\n permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]); permute_143 = None\n view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]); mm_19 = None\n view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]); addmm_38 = None\n mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76); mul_76 = None\n pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715); pow_10 = None\n add_78 = torch.ops.aten.add.Tensor(view_117, mul_77); mul_77 = None\n mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654); add_78 = None\n tanh_9 = torch.ops.aten.tanh.default(mul_78); mul_78 = None\n add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0)\n mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79); view_182 = add_79 = None\n mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9); tanh_9 = None\n sub_42 = torch.ops.aten.sub.Tensor(1, mul_153); mul_153 = None\n mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42); mul_151 = sub_42 = None\n mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654); mul_154 = None\n mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715)\n pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0); view_117 = None\n mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0); pow_15 = None\n mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157); mul_156 = mul_157 = None\n add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158); mul_155 = mul_158 = None\n mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5); mul_152 = None\n add_108 = torch.ops.aten.add.Tensor(add_107, mul_159); add_107 = mul_159 = None\n view_183 = torch.ops.aten.view.default(add_108, [64, 3072]); add_108 = None\n mm_21 = torch.ops.aten.mm.default(view_183, permute_145); permute_145 = None\n permute_146 = torch.ops.aten.permute.default(view_183, [1, 0])\n mm_22 = torch.ops.aten.mm.default(permute_146, view_116); permute_146 = view_116 = None\n permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]); mm_22 = None\n sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True); view_183 = None\n view_184 = torch.ops.aten.view.default(sum_30, [3072]); sum_30 = None\n permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]); permute_147 = None\n view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]); mm_21 = None\n mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118); primals_118 = None\n mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768)\n sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)\n mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74); mul_161 = None\n sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True); mul_163 = None\n mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32); sum_32 = None\n sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31); mul_162 = sum_31 = None\n sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164); sub_44 = mul_164 = None\n mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45); div_5 = sub_45 = None\n mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74); mul_74 = None\n sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]); mul_166 = None\n sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]); view_185 = None\n add_109 = torch.ops.aten.add.Tensor(add_106, mul_165); add_106 = mul_165 = None\n view_186 = torch.ops.aten.view.default(add_109, [64, 768])\n mm_23 = torch.ops.aten.mm.default(view_186, permute_149); permute_149 = None\n permute_150 = torch.ops.aten.permute.default(view_186, [1, 0])\n permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]); permute_76 = None\n view_114 = torch.ops.aten.view.default(view_113, [64, 768]); view_113 = None\n mm_24 = torch.ops.aten.mm.default(permute_150, view_114); permute_150 = view_114 = None\n permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]); mm_24 = None\n sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True); view_186 = None\n view_187 = torch.ops.aten.view.default(sum_35, [768]); sum_35 = None\n permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]); permute_151 = None\n view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]); mm_23 = None\n view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]); view_188 = None\n permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]); view_189 = None\n _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True); permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None\n getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0]\n getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1]\n getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2]; _scaled_dot_product_efficient_attention_backward_2 = None\n permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]); getitem_144 = None\n view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]); permute_154 = None\n permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]); getitem_142 = None\n view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]); permute_155 = None\n permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]); getitem_143 = None\n view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]); permute_156 = None\n cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2); view_191 = view_192 = view_190 = None\n view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]); cat_2 = None\n mm_25 = torch.ops.aten.mm.default(view_193, permute_157); permute_157 = None\n permute_158 = torch.ops.aten.permute.default(view_193, [1, 0])\n mm_26 = torch.ops.aten.mm.default(permute_158, view_108); permute_158 = view_108 = None\n permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]); mm_26 = None\n sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True); view_193 = None\n view_194 = torch.ops.aten.view.default(sum_36, [2304]); sum_36 = None\n permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]); permute_159 = None\n view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]); mm_25 = None\n mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112); primals_112 = None\n mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768)\n sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)\n mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72); mul_168 = None\n sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True); mul_170 = None\n mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38); sum_38 = None\n sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37); mul_169 = sum_37 = None\n sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171); sub_47 = mul_171 = None\n mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48); div_6 = sub_48 = None\n mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72); mul_72 = None\n sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]); mul_173 = None\n sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]); view_195 = None\n add_110 = torch.ops.aten.add.Tensor(add_109, mul_172); add_109 = mul_172 = None\n view_196 = torch.ops.aten.view.default(add_110, [64, 768])\n mm_27 = torch.ops.aten.mm.default(view_196, permute_161); permute_161 = None\n permute_162 = torch.ops.aten.permute.default(view_196, [1, 0])\n mm_28 = torch.ops.aten.mm.default(permute_162, view_106); permute_162 = view_106 = None\n permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]); mm_28 = None\n sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True); view_196 = None\n view_197 = torch.ops.aten.view.default(sum_41, [768]); sum_41 = None\n permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]); permute_163 = None\n view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]); mm_27 = None\n view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]); addmm_34 = None\n mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68); mul_68 = None\n pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715); pow_9 = None\n add_70 = torch.ops.aten.add.Tensor(view_105, mul_69); mul_69 = None\n mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654); add_70 = None\n tanh_8 = torch.ops.aten.tanh.default(mul_70); mul_70 = None\n add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0)\n mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71); view_198 = add_71 = None\n mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8); tanh_8 = None\n sub_49 = torch.ops.aten.sub.Tensor(1, mul_176); mul_176 = None\n mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49); mul_174 = sub_49 = None\n mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654); mul_177 = None\n mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715)\n pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0); view_105 = None\n mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0); pow_16 = None\n mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180); mul_179 = mul_180 = None\n add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181); mul_178 = mul_181 = None\n mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5); mul_175 = None\n add_112 = torch.ops.aten.add.Tensor(add_111, mul_182); add_111 = mul_182 = None\n view_199 = torch.ops.aten.view.default(add_112, [64, 3072]); add_112 = None\n mm_29 = torch.ops.aten.mm.default(view_199, permute_165); permute_165 = None\n permute_166 = torch.ops.aten.permute.default(view_199, [1, 0])\n mm_30 = torch.ops.aten.mm.default(permute_166, view_104); permute_166 = view_104 = None\n permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]); mm_30 = None\n sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True); view_199 = None\n view_200 = torch.ops.aten.view.default(sum_42, [3072]); sum_42 = None\n permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]); permute_167 = None\n view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]); mm_29 = None\n mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106); primals_106 = None\n mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768)\n sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)\n mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66); mul_184 = None\n sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True); mul_186 = None\n mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44); sum_44 = None\n sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43); mul_185 = sum_43 = None\n sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187); sub_51 = mul_187 = None\n mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52); div_7 = sub_52 = None\n mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66); mul_66 = None\n sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]); mul_189 = None\n sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]); view_201 = None\n add_113 = torch.ops.aten.add.Tensor(add_110, mul_188); add_110 = mul_188 = None\n view_202 = torch.ops.aten.view.default(add_113, [64, 768])\n mm_31 = torch.ops.aten.mm.default(view_202, permute_169); permute_169 = None\n permute_170 = torch.ops.aten.permute.default(view_202, [1, 0])\n permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]); permute_68 = None\n view_102 = torch.ops.aten.view.default(view_101, [64, 768]); view_101 = None\n mm_32 = torch.ops.aten.mm.default(permute_170, view_102); permute_170 = view_102 = None\n permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]); mm_32 = None\n sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True); view_202 = None\n view_203 = torch.ops.aten.view.default(sum_47, [768]); sum_47 = None\n permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]); permute_171 = None\n view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]); mm_31 = None\n view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]); view_204 = None\n permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]); view_205 = None\n _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True); permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None\n getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0]\n getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1]\n getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2]; _scaled_dot_product_efficient_attention_backward_3 = None\n permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]); getitem_148 = None\n view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]); permute_174 = None\n permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]); getitem_146 = None\n view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]); permute_175 = None\n permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]); getitem_147 = None\n view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]); permute_176 = None\n cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2); view_207 = view_208 = view_206 = None\n view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]); cat_3 = None\n mm_33 = torch.ops.aten.mm.default(view_209, permute_177); permute_177 = None\n permute_178 = torch.ops.aten.permute.default(view_209, [1, 0])\n mm_34 = torch.ops.aten.mm.default(permute_178, view_96); permute_178 = view_96 = None\n permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]); mm_34 = None\n sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True); view_209 = None\n view_210 = torch.ops.aten.view.default(sum_48, [2304]); sum_48 = None\n permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]); permute_179 = None\n view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]); mm_33 = None\n mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100); primals_100 = None\n mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768)\n sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)\n mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64); mul_191 = None\n sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True); mul_193 = None\n mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50); sum_50 = None\n sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49); mul_192 = sum_49 = None\n sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194); sub_54 = mul_194 = None\n mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55); div_8 = sub_55 = None\n mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64); mul_64 = None\n sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]); mul_196 = None\n sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]); view_211 = None\n add_114 = torch.ops.aten.add.Tensor(add_113, mul_195); add_113 = mul_195 = None\n view_212 = torch.ops.aten.view.default(add_114, [64, 768])\n mm_35 = torch.ops.aten.mm.default(view_212, permute_181); permute_181 = None\n permute_182 = torch.ops.aten.permute.default(view_212, [1, 0])\n mm_36 = torch.ops.aten.mm.default(permute_182, view_94); permute_182 = view_94 = None\n permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]); mm_36 = None\n sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True); view_212 = None\n view_213 = torch.ops.aten.view.default(sum_53, [768]); sum_53 = None\n permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]); permute_183 = None\n view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]); mm_35 = None\n view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]); addmm_30 = None\n mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60); mul_60 = None\n pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715); pow_8 = None\n add_62 = torch.ops.aten.add.Tensor(view_93, mul_61); mul_61 = None\n mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654); add_62 = None\n tanh_7 = torch.ops.aten.tanh.default(mul_62); mul_62 = None\n add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0)\n mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63); view_214 = add_63 = None\n mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7); tanh_7 = None\n sub_56 = torch.ops.aten.sub.Tensor(1, mul_199); mul_199 = None\n mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56); mul_197 = sub_56 = None\n mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654); mul_200 = None\n mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715)\n pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0); view_93 = None\n mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0); pow_17 = None\n mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203); mul_202 = mul_203 = None\n add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204); mul_201 = mul_204 = None\n mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5); mul_198 = None\n add_116 = torch.ops.aten.add.Tensor(add_115, mul_205); add_115 = mul_205 = None\n view_215 = torch.ops.aten.view.default(add_116, [64, 3072]); add_116 = None\n mm_37 = torch.ops.aten.mm.default(view_215, permute_185); permute_185 = None\n permute_186 = torch.ops.aten.permute.default(view_215, [1, 0])\n mm_38 = torch.ops.aten.mm.default(permute_186, view_92); permute_186 = view_92 = None\n permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]); mm_38 = None\n sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True); view_215 = None\n view_216 = torch.ops.aten.view.default(sum_54, [3072]); sum_54 = None\n permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]); permute_187 = None\n view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]); mm_37 = None\n mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94); primals_94 = None\n mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768)\n sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)\n mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58); mul_207 = None\n sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True); mul_209 = None\n mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56); sum_56 = None\n sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55); mul_208 = sum_55 = None\n sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210); sub_58 = mul_210 = None\n mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59); div_9 = sub_59 = None\n mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58); mul_58 = None\n sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]); mul_212 = None\n sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]); view_217 = None\n add_117 = torch.ops.aten.add.Tensor(add_114, mul_211); add_114 = mul_211 = None\n view_218 = torch.ops.aten.view.default(add_117, [64, 768])\n mm_39 = torch.ops.aten.mm.default(view_218, permute_189); permute_189 = None\n permute_190 = torch.ops.aten.permute.default(view_218, [1, 0])\n permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]); permute_60 = None\n view_90 = torch.ops.aten.view.default(view_89, [64, 768]); view_89 = None\n mm_40 = torch.ops.aten.mm.default(permute_190, view_90); permute_190 = view_90 = None\n permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]); mm_40 = None\n sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True); view_218 = None\n view_219 = torch.ops.aten.view.default(sum_59, [768]); sum_59 = None\n permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]); permute_191 = None\n view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]); mm_39 = None\n view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]); view_220 = None\n permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]); view_221 = None\n _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True); permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None\n getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0]\n getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1]\n getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2]; _scaled_dot_product_efficient_attention_backward_4 = None\n permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]); getitem_152 = None\n view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]); permute_194 = None\n permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]); getitem_150 = None\n view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]); permute_195 = None\n permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]); getitem_151 = None\n view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]); permute_196 = None\n cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2); view_223 = view_224 = view_222 = None\n view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]); cat_4 = None\n mm_41 = torch.ops.aten.mm.default(view_225, permute_197); permute_197 = None\n permute_198 = torch.ops.aten.permute.default(view_225, [1, 0])\n mm_42 = torch.ops.aten.mm.default(permute_198, view_84); permute_198 = view_84 = None\n permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]); mm_42 = None\n sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True); view_225 = None\n view_226 = torch.ops.aten.view.default(sum_60, [2304]); sum_60 = None\n permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]); permute_199 = None\n view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]); mm_41 = None\n mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88); primals_88 = None\n mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768)\n sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)\n mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56); mul_214 = None\n sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True); mul_216 = None\n mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62); sum_62 = None\n sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61); mul_215 = sum_61 = None\n sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217); sub_61 = mul_217 = None\n mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62); div_10 = sub_62 = None\n mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56); mul_56 = None\n sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]); mul_219 = None\n sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]); view_227 = None\n add_118 = torch.ops.aten.add.Tensor(add_117, mul_218); add_117 = mul_218 = None\n view_228 = torch.ops.aten.view.default(add_118, [64, 768])\n mm_43 = torch.ops.aten.mm.default(view_228, permute_201); permute_201 = None\n permute_202 = torch.ops.aten.permute.default(view_228, [1, 0])\n mm_44 = torch.ops.aten.mm.default(permute_202, view_82); permute_202 = view_82 = None\n permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]); mm_44 = None\n sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True); view_228 = None\n view_229 = torch.ops.aten.view.default(sum_65, [768]); sum_65 = None\n permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]); permute_203 = None\n view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]); mm_43 = None\n view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]); addmm_26 = None\n mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52); mul_52 = None\n pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715); pow_7 = None\n add_54 = torch.ops.aten.add.Tensor(view_81, mul_53); mul_53 = None\n mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654); add_54 = None\n tanh_6 = torch.ops.aten.tanh.default(mul_54); mul_54 = None\n add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0)\n mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55); view_230 = add_55 = None\n mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6); tanh_6 = None\n sub_63 = torch.ops.aten.sub.Tensor(1, mul_222); mul_222 = None\n mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63); mul_220 = sub_63 = None\n mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654); mul_223 = None\n mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715)\n pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0); view_81 = None\n mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0); pow_18 = None\n mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226); mul_225 = mul_226 = None\n add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227); mul_224 = mul_227 = None\n mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5); mul_221 = None\n add_120 = torch.ops.aten.add.Tensor(add_119, mul_228); add_119 = mul_228 = None\n view_231 = torch.ops.aten.view.default(add_120, [64, 3072]); add_120 = None\n mm_45 = torch.ops.aten.mm.default(view_231, permute_205); permute_205 = None\n permute_206 = torch.ops.aten.permute.default(view_231, [1, 0])\n mm_46 = torch.ops.aten.mm.default(permute_206, view_80); permute_206 = view_80 = None\n permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]); mm_46 = None\n sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True); view_231 = None\n view_232 = torch.ops.aten.view.default(sum_66, [3072]); sum_66 = None\n permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]); permute_207 = None\n view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]); mm_45 = None\n mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82); primals_82 = None\n mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768)\n sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)\n mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50); mul_230 = None\n sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True); mul_232 = None\n mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68); sum_68 = None\n sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67); mul_231 = sum_67 = None\n sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233); sub_65 = mul_233 = None\n mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66); div_11 = sub_66 = None\n mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50); mul_50 = None\n sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]); mul_235 = None\n sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]); view_233 = None\n add_121 = torch.ops.aten.add.Tensor(add_118, mul_234); add_118 = mul_234 = None\n view_234 = torch.ops.aten.view.default(add_121, [64, 768])\n mm_47 = torch.ops.aten.mm.default(view_234, permute_209); permute_209 = None\n permute_210 = torch.ops.aten.permute.default(view_234, [1, 0])\n permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]); permute_52 = None\n view_78 = torch.ops.aten.view.default(view_77, [64, 768]); view_77 = None\n mm_48 = torch.ops.aten.mm.default(permute_210, view_78); permute_210 = view_78 = None\n permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]); mm_48 = None\n sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True); view_234 = None\n view_235 = torch.ops.aten.view.default(sum_71, [768]); sum_71 = None\n permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]); permute_211 = None\n view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]); mm_47 = None\n view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]); view_236 = None\n permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]); view_237 = None\n _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True); permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None\n getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0]\n getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1]\n getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2]; _scaled_dot_product_efficient_attention_backward_5 = None\n permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]); getitem_156 = None\n view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]); permute_214 = None\n permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]); getitem_154 = None\n view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]); permute_215 = None\n permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]); getitem_155 = None\n view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]); permute_216 = None\n cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2); view_239 = view_240 = view_238 = None\n view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]); cat_5 = None\n mm_49 = torch.ops.aten.mm.default(view_241, permute_217); permute_217 = None\n permute_218 = torch.ops.aten.permute.default(view_241, [1, 0])\n mm_50 = torch.ops.aten.mm.default(permute_218, view_72); permute_218 = view_72 = None\n permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]); mm_50 = None\n sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True); view_241 = None\n view_242 = torch.ops.aten.view.default(sum_72, [2304]); sum_72 = None\n permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]); permute_219 = None\n view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]); mm_49 = None\n mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76); primals_76 = None\n mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768)\n sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)\n mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48); mul_237 = None\n sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True); mul_239 = None\n mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74); sum_74 = None\n sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73); mul_238 = sum_73 = None\n sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240); sub_68 = mul_240 = None\n mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69); div_12 = sub_69 = None\n mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48); mul_48 = None\n sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]); mul_242 = None\n sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]); view_243 = None\n add_122 = torch.ops.aten.add.Tensor(add_121, mul_241); add_121 = mul_241 = None\n view_244 = torch.ops.aten.view.default(add_122, [64, 768])\n mm_51 = torch.ops.aten.mm.default(view_244, permute_221); permute_221 = None\n permute_222 = torch.ops.aten.permute.default(view_244, [1, 0])\n mm_52 = torch.ops.aten.mm.default(permute_222, view_70); permute_222 = view_70 = None\n permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]); mm_52 = None\n sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True); view_244 = None\n view_245 = torch.ops.aten.view.default(sum_77, [768]); sum_77 = None\n permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]); permute_223 = None\n view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]); mm_51 = None\n view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]); addmm_22 = None\n mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44); mul_44 = None\n pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715); pow_6 = None\n add_46 = torch.ops.aten.add.Tensor(view_69, mul_45); mul_45 = None\n mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654); add_46 = None\n tanh_5 = torch.ops.aten.tanh.default(mul_46); mul_46 = None\n add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0)\n mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47); view_246 = add_47 = None\n mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5); tanh_5 = None\n sub_70 = torch.ops.aten.sub.Tensor(1, mul_245); mul_245 = None\n mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70); mul_243 = sub_70 = None\n mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654); mul_246 = None\n mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715)\n pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0); view_69 = None\n mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0); pow_19 = None\n mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249); mul_248 = mul_249 = None\n add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250); mul_247 = mul_250 = None\n mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5); mul_244 = None\n add_124 = torch.ops.aten.add.Tensor(add_123, mul_251); add_123 = mul_251 = None\n view_247 = torch.ops.aten.view.default(add_124, [64, 3072]); add_124 = None\n mm_53 = torch.ops.aten.mm.default(view_247, permute_225); permute_225 = None\n permute_226 = torch.ops.aten.permute.default(view_247, [1, 0])\n mm_54 = torch.ops.aten.mm.default(permute_226, view_68); permute_226 = view_68 = None\n permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]); mm_54 = None\n sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True); view_247 = None\n view_248 = torch.ops.aten.view.default(sum_78, [3072]); sum_78 = None\n permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]); permute_227 = None\n view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]); mm_53 = None\n mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70); primals_70 = None\n mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768)\n sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)\n mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42); mul_253 = None\n sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True); mul_255 = None\n mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80); sum_80 = None\n sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79); mul_254 = sum_79 = None\n sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256); sub_72 = mul_256 = None\n mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73); div_13 = sub_73 = None\n mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42); mul_42 = None\n sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]); mul_258 = None\n sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]); view_249 = None\n add_125 = torch.ops.aten.add.Tensor(add_122, mul_257); add_122 = mul_257 = None\n view_250 = torch.ops.aten.view.default(add_125, [64, 768])\n mm_55 = torch.ops.aten.mm.default(view_250, permute_229); permute_229 = None\n permute_230 = torch.ops.aten.permute.default(view_250, [1, 0])\n permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]); permute_44 = None\n view_66 = torch.ops.aten.view.default(view_65, [64, 768]); view_65 = None\n mm_56 = torch.ops.aten.mm.default(permute_230, view_66); permute_230 = view_66 = None\n permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]); mm_56 = None\n sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True); view_250 = None\n view_251 = torch.ops.aten.view.default(sum_83, [768]); sum_83 = None\n permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]); permute_231 = None\n view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]); mm_55 = None\n view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]); view_252 = None\n permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]); view_253 = None\n _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True); permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None\n getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0]\n getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1]\n getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2]; _scaled_dot_product_efficient_attention_backward_6 = None\n permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]); getitem_160 = None\n view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]); permute_234 = None\n permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]); getitem_158 = None\n view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]); permute_235 = None\n permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]); getitem_159 = None\n view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]); permute_236 = None\n cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2); view_255 = view_256 = view_254 = None\n view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]); cat_6 = None\n mm_57 = torch.ops.aten.mm.default(view_257, permute_237); permute_237 = None\n permute_238 = torch.ops.aten.permute.default(view_257, [1, 0])\n mm_58 = torch.ops.aten.mm.default(permute_238, view_60); permute_238 = view_60 = None\n permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]); mm_58 = None\n sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True); view_257 = None\n view_258 = torch.ops.aten.view.default(sum_84, [2304]); sum_84 = None\n permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]); permute_239 = None\n view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]); mm_57 = None\n mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64); primals_64 = None\n mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768)\n sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)\n mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40); mul_260 = None\n sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True); mul_262 = None\n mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86); sum_86 = None\n sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85); mul_261 = sum_85 = None\n sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263); sub_75 = mul_263 = None\n mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76); div_14 = sub_76 = None\n mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40); mul_40 = None\n sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]); mul_265 = None\n sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]); view_259 = None\n add_126 = torch.ops.aten.add.Tensor(add_125, mul_264); add_125 = mul_264 = None\n view_260 = torch.ops.aten.view.default(add_126, [64, 768])\n mm_59 = torch.ops.aten.mm.default(view_260, permute_241); permute_241 = None\n permute_242 = torch.ops.aten.permute.default(view_260, [1, 0])\n mm_60 = torch.ops.aten.mm.default(permute_242, view_58); permute_242 = view_58 = None\n permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]); mm_60 = None\n sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True); view_260 = None\n view_261 = torch.ops.aten.view.default(sum_89, [768]); sum_89 = None\n permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]); permute_243 = None\n view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]); mm_59 = None\n view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]); addmm_18 = None\n mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36); mul_36 = None\n pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715); pow_5 = None\n add_38 = torch.ops.aten.add.Tensor(view_57, mul_37); mul_37 = None\n mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654); add_38 = None\n tanh_4 = torch.ops.aten.tanh.default(mul_38); mul_38 = None\n add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0)\n mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39); view_262 = add_39 = None\n mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4); tanh_4 = None\n sub_77 = torch.ops.aten.sub.Tensor(1, mul_268); mul_268 = None\n mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77); mul_266 = sub_77 = None\n mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654); mul_269 = None\n mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715)\n pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0); view_57 = None\n mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0); pow_20 = None\n mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272); mul_271 = mul_272 = None\n add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273); mul_270 = mul_273 = None\n mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5); mul_267 = None\n add_128 = torch.ops.aten.add.Tensor(add_127, mul_274); add_127 = mul_274 = None\n view_263 = torch.ops.aten.view.default(add_128, [64, 3072]); add_128 = None\n mm_61 = torch.ops.aten.mm.default(view_263, permute_245); permute_245 = None\n permute_246 = torch.ops.aten.permute.default(view_263, [1, 0])\n mm_62 = torch.ops.aten.mm.default(permute_246, view_56); permute_246 = view_56 = None\n permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]); mm_62 = None\n sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True); view_263 = None\n view_264 = torch.ops.aten.view.default(sum_90, [3072]); sum_90 = None\n permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]); permute_247 = None\n view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]); mm_61 = None\n mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58); primals_58 = None\n mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768)\n sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)\n mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34); mul_276 = None\n sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True); mul_278 = None\n mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92); sum_92 = None\n sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91); mul_277 = sum_91 = None\n sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279); sub_79 = mul_279 = None\n mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80); div_15 = sub_80 = None\n mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34); mul_34 = None\n sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]); mul_281 = None\n sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]); view_265 = None\n add_129 = torch.ops.aten.add.Tensor(add_126, mul_280); add_126 = mul_280 = None\n view_266 = torch.ops.aten.view.default(add_129, [64, 768])\n mm_63 = torch.ops.aten.mm.default(view_266, permute_249); permute_249 = None\n permute_250 = torch.ops.aten.permute.default(view_266, [1, 0])\n permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]); permute_36 = None\n view_54 = torch.ops.aten.view.default(view_53, [64, 768]); view_53 = None\n mm_64 = torch.ops.aten.mm.default(permute_250, view_54); permute_250 = view_54 = None\n permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]); mm_64 = None\n sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True); view_266 = None\n view_267 = torch.ops.aten.view.default(sum_95, [768]); sum_95 = None\n permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]); permute_251 = None\n view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]); mm_63 = None\n view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]); view_268 = None\n permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]); view_269 = None\n _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True); permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None\n getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0]\n getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1]\n getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2]; _scaled_dot_product_efficient_attention_backward_7 = None\n permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]); getitem_164 = None\n view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]); permute_254 = None\n permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]); getitem_162 = None\n view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]); permute_255 = None\n permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]); getitem_163 = None\n view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]); permute_256 = None\n cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2); view_271 = view_272 = view_270 = None\n view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]); cat_7 = None\n mm_65 = torch.ops.aten.mm.default(view_273, permute_257); permute_257 = None\n permute_258 = torch.ops.aten.permute.default(view_273, [1, 0])\n mm_66 = torch.ops.aten.mm.default(permute_258, view_48); permute_258 = view_48 = None\n permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]); mm_66 = None\n sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True); view_273 = None\n view_274 = torch.ops.aten.view.default(sum_96, [2304]); sum_96 = None\n permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]); permute_259 = None\n view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]); mm_65 = None\n mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52); primals_52 = None\n mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768)\n sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)\n mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32); mul_283 = None\n sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True); mul_285 = None\n mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98); sum_98 = None\n sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97); mul_284 = sum_97 = None\n sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286); sub_82 = mul_286 = None\n mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83); div_16 = sub_83 = None\n mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32); mul_32 = None\n sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]); mul_288 = None\n sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]); view_275 = None\n add_130 = torch.ops.aten.add.Tensor(add_129, mul_287); add_129 = mul_287 = None\n view_276 = torch.ops.aten.view.default(add_130, [64, 768])\n mm_67 = torch.ops.aten.mm.default(view_276, permute_261); permute_261 = None\n permute_262 = torch.ops.aten.permute.default(view_276, [1, 0])\n mm_68 = torch.ops.aten.mm.default(permute_262, view_46); permute_262 = view_46 = None\n permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]); mm_68 = None\n sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True); view_276 = None\n view_277 = torch.ops.aten.view.default(sum_101, [768]); sum_101 = None\n permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]); permute_263 = None\n view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]); mm_67 = None\n view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]); addmm_14 = None\n mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28); mul_28 = None\n pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715); pow_4 = None\n add_30 = torch.ops.aten.add.Tensor(view_45, mul_29); mul_29 = None\n mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654); add_30 = None\n tanh_3 = torch.ops.aten.tanh.default(mul_30); mul_30 = None\n add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0)\n mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31); view_278 = add_31 = None\n mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3); tanh_3 = None\n sub_84 = torch.ops.aten.sub.Tensor(1, mul_291); mul_291 = None\n mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84); mul_289 = sub_84 = None\n mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654); mul_292 = None\n mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715)\n pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0); view_45 = None\n mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0); pow_21 = None\n mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295); mul_294 = mul_295 = None\n add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296); mul_293 = mul_296 = None\n mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5); mul_290 = None\n add_132 = torch.ops.aten.add.Tensor(add_131, mul_297); add_131 = mul_297 = None\n view_279 = torch.ops.aten.view.default(add_132, [64, 3072]); add_132 = None\n mm_69 = torch.ops.aten.mm.default(view_279, permute_265); permute_265 = None\n permute_266 = torch.ops.aten.permute.default(view_279, [1, 0])\n mm_70 = torch.ops.aten.mm.default(permute_266, view_44); permute_266 = view_44 = None\n permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]); mm_70 = None\n sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True); view_279 = None\n view_280 = torch.ops.aten.view.default(sum_102, [3072]); sum_102 = None\n permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]); permute_267 = None\n view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]); mm_69 = None\n mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46); primals_46 = None\n mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768)\n sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)\n mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26); mul_299 = None\n sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True); mul_301 = None\n mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104); sum_104 = None\n sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103); mul_300 = sum_103 = None\n sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302); sub_86 = mul_302 = None\n mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87); div_17 = sub_87 = None\n mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26); mul_26 = None\n sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]); mul_304 = None\n sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]); view_281 = None\n add_133 = torch.ops.aten.add.Tensor(add_130, mul_303); add_130 = mul_303 = None\n view_282 = torch.ops.aten.view.default(add_133, [64, 768])\n mm_71 = torch.ops.aten.mm.default(view_282, permute_269); permute_269 = None\n permute_270 = torch.ops.aten.permute.default(view_282, [1, 0])\n permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]); permute_28 = None\n view_42 = torch.ops.aten.view.default(view_41, [64, 768]); view_41 = None\n mm_72 = torch.ops.aten.mm.default(permute_270, view_42); permute_270 = view_42 = None\n permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]); mm_72 = None\n sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True); view_282 = None\n view_283 = torch.ops.aten.view.default(sum_107, [768]); sum_107 = None\n permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]); permute_271 = None\n view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]); mm_71 = None\n view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]); view_284 = None\n permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]); view_285 = None\n _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True); permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None\n getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0]\n getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1]\n getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2]; _scaled_dot_product_efficient_attention_backward_8 = None\n permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]); getitem_168 = None\n view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]); permute_274 = None\n permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]); getitem_166 = None\n view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]); permute_275 = None\n permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]); getitem_167 = None\n view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]); permute_276 = None\n cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2); view_287 = view_288 = view_286 = None\n view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]); cat_8 = None\n mm_73 = torch.ops.aten.mm.default(view_289, permute_277); permute_277 = None\n permute_278 = torch.ops.aten.permute.default(view_289, [1, 0])\n mm_74 = torch.ops.aten.mm.default(permute_278, view_36); permute_278 = view_36 = None\n permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]); mm_74 = None\n sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True); view_289 = None\n view_290 = torch.ops.aten.view.default(sum_108, [2304]); sum_108 = None\n permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]); permute_279 = None\n view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]); mm_73 = None\n mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40); primals_40 = None\n mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768)\n sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)\n mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24); mul_306 = None\n sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True); mul_308 = None\n mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110); sum_110 = None\n sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109); mul_307 = sum_109 = None\n sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309); sub_89 = mul_309 = None\n mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90); div_18 = sub_90 = None\n mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24); mul_24 = None\n sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]); mul_311 = None\n sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]); view_291 = None\n add_134 = torch.ops.aten.add.Tensor(add_133, mul_310); add_133 = mul_310 = None\n view_292 = torch.ops.aten.view.default(add_134, [64, 768])\n mm_75 = torch.ops.aten.mm.default(view_292, permute_281); permute_281 = None\n permute_282 = torch.ops.aten.permute.default(view_292, [1, 0])\n mm_76 = torch.ops.aten.mm.default(permute_282, view_34); permute_282 = view_34 = None\n permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]); mm_76 = None\n sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True); view_292 = None\n view_293 = torch.ops.aten.view.default(sum_113, [768]); sum_113 = None\n permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]); permute_283 = None\n view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]); mm_75 = None\n view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]); addmm_10 = None\n mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20); mul_20 = None\n pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715); pow_3 = None\n add_22 = torch.ops.aten.add.Tensor(view_33, mul_21); mul_21 = None\n mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654); add_22 = None\n tanh_2 = torch.ops.aten.tanh.default(mul_22); mul_22 = None\n add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0)\n mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23); view_294 = add_23 = None\n mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2); tanh_2 = None\n sub_91 = torch.ops.aten.sub.Tensor(1, mul_314); mul_314 = None\n mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91); mul_312 = sub_91 = None\n mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654); mul_315 = None\n mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715)\n pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0); view_33 = None\n mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0); pow_22 = None\n mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318); mul_317 = mul_318 = None\n add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319); mul_316 = mul_319 = None\n mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5); mul_313 = None\n add_136 = torch.ops.aten.add.Tensor(add_135, mul_320); add_135 = mul_320 = None\n view_295 = torch.ops.aten.view.default(add_136, [64, 3072]); add_136 = None\n mm_77 = torch.ops.aten.mm.default(view_295, permute_285); permute_285 = None\n permute_286 = torch.ops.aten.permute.default(view_295, [1, 0])\n mm_78 = torch.ops.aten.mm.default(permute_286, view_32); permute_286 = view_32 = None\n permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]); mm_78 = None\n sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True); view_295 = None\n view_296 = torch.ops.aten.view.default(sum_114, [3072]); sum_114 = None\n permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]); permute_287 = None\n view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]); mm_77 = None\n mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34); primals_34 = None\n mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768)\n sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)\n mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18); mul_322 = None\n sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True); mul_324 = None\n mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116); sum_116 = None\n sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115); mul_323 = sum_115 = None\n sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325); sub_93 = mul_325 = None\n mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94); div_19 = sub_94 = None\n mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18); mul_18 = None\n sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]); mul_327 = None\n sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]); view_297 = None\n add_137 = torch.ops.aten.add.Tensor(add_134, mul_326); add_134 = mul_326 = None\n view_298 = torch.ops.aten.view.default(add_137, [64, 768])\n mm_79 = torch.ops.aten.mm.default(view_298, permute_289); permute_289 = None\n permute_290 = torch.ops.aten.permute.default(view_298, [1, 0])\n permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]); permute_20 = None\n view_30 = torch.ops.aten.view.default(view_29, [64, 768]); view_29 = None\n mm_80 = torch.ops.aten.mm.default(permute_290, view_30); permute_290 = view_30 = None\n permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]); mm_80 = None\n sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True); view_298 = None\n view_299 = torch.ops.aten.view.default(sum_119, [768]); sum_119 = None\n permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]); permute_291 = None\n view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]); mm_79 = None\n view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]); view_300 = None\n permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]); view_301 = None\n _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True); permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None\n getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0]\n getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1]\n getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2]; _scaled_dot_product_efficient_attention_backward_9 = None\n permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]); getitem_172 = None\n view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]); permute_294 = None\n permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]); getitem_170 = None\n view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]); permute_295 = None\n permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]); getitem_171 = None\n view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]); permute_296 = None\n cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2); view_303 = view_304 = view_302 = None\n view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]); cat_9 = None\n mm_81 = torch.ops.aten.mm.default(view_305, permute_297); permute_297 = None\n permute_298 = torch.ops.aten.permute.default(view_305, [1, 0])\n mm_82 = torch.ops.aten.mm.default(permute_298, view_24); permute_298 = view_24 = None\n permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]); mm_82 = None\n sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True); view_305 = None\n view_306 = torch.ops.aten.view.default(sum_120, [2304]); sum_120 = None\n permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]); permute_299 = None\n view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]); mm_81 = None\n mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28); primals_28 = None\n mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768)\n sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)\n mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16); mul_329 = None\n sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True); mul_331 = None\n mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122); sum_122 = None\n sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121); mul_330 = sum_121 = None\n sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332); sub_96 = mul_332 = None\n mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97); div_20 = sub_97 = None\n mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16); mul_16 = None\n sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]); mul_334 = None\n sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]); view_307 = None\n add_138 = torch.ops.aten.add.Tensor(add_137, mul_333); add_137 = mul_333 = None\n view_308 = torch.ops.aten.view.default(add_138, [64, 768])\n mm_83 = torch.ops.aten.mm.default(view_308, permute_301); permute_301 = None\n permute_302 = torch.ops.aten.permute.default(view_308, [1, 0])\n mm_84 = torch.ops.aten.mm.default(permute_302, view_22); permute_302 = view_22 = None\n permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]); mm_84 = None\n sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True); view_308 = None\n view_309 = torch.ops.aten.view.default(sum_125, [768]); sum_125 = None\n permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]); permute_303 = None\n view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]); mm_83 = None\n view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]); addmm_6 = None\n mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12); mul_12 = None\n pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715); pow_2 = None\n add_14 = torch.ops.aten.add.Tensor(view_21, mul_13); mul_13 = None\n mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654); add_14 = None\n tanh_1 = torch.ops.aten.tanh.default(mul_14); mul_14 = None\n add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0)\n mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15); view_310 = add_15 = None\n mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1); tanh_1 = None\n sub_98 = torch.ops.aten.sub.Tensor(1, mul_337); mul_337 = None\n mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98); mul_335 = sub_98 = None\n mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654); mul_338 = None\n mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715)\n pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0); view_21 = None\n mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0); pow_23 = None\n mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341); mul_340 = mul_341 = None\n add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342); mul_339 = mul_342 = None\n mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5); mul_336 = None\n add_140 = torch.ops.aten.add.Tensor(add_139, mul_343); add_139 = mul_343 = None\n view_311 = torch.ops.aten.view.default(add_140, [64, 3072]); add_140 = None\n mm_85 = torch.ops.aten.mm.default(view_311, permute_305); permute_305 = None\n permute_306 = torch.ops.aten.permute.default(view_311, [1, 0])\n mm_86 = torch.ops.aten.mm.default(permute_306, view_20); permute_306 = view_20 = None\n permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]); mm_86 = None\n sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True); view_311 = None\n view_312 = torch.ops.aten.view.default(sum_126, [3072]); sum_126 = None\n permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]); permute_307 = None\n view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]); mm_85 = None\n mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22); primals_22 = None\n mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768)\n sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)\n mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10); mul_345 = None\n sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True); mul_347 = None\n mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128); sum_128 = None\n sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127); mul_346 = sum_127 = None\n sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348); sub_100 = mul_348 = None\n mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101); div_21 = sub_101 = None\n mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10); mul_10 = None\n sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]); mul_350 = None\n sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]); view_313 = None\n add_141 = torch.ops.aten.add.Tensor(add_138, mul_349); add_138 = mul_349 = None\n view_314 = torch.ops.aten.view.default(add_141, [64, 768])\n mm_87 = torch.ops.aten.mm.default(view_314, permute_309); permute_309 = None\n permute_310 = torch.ops.aten.permute.default(view_314, [1, 0])\n permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]); permute_12 = None\n view_18 = torch.ops.aten.view.default(view_17, [64, 768]); view_17 = None\n mm_88 = torch.ops.aten.mm.default(permute_310, view_18); permute_310 = view_18 = None\n permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]); mm_88 = None\n sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True); view_314 = None\n view_315 = torch.ops.aten.view.default(sum_131, [768]); sum_131 = None\n permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]); permute_311 = None\n view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]); mm_87 = None\n view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]); view_316 = None\n permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]); view_317 = None\n _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True); permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None\n getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0]\n getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1]\n getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2]; _scaled_dot_product_efficient_attention_backward_10 = None\n permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]); getitem_176 = None\n view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]); permute_314 = None\n permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]); getitem_174 = None\n view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]); permute_315 = None\n permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]); getitem_175 = None\n view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]); permute_316 = None\n cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2); view_319 = view_320 = view_318 = None\n view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]); cat_10 = None\n mm_89 = torch.ops.aten.mm.default(view_321, permute_317); permute_317 = None\n permute_318 = torch.ops.aten.permute.default(view_321, [1, 0])\n mm_90 = torch.ops.aten.mm.default(permute_318, view_12); permute_318 = view_12 = None\n permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]); mm_90 = None\n sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True); view_321 = None\n view_322 = torch.ops.aten.view.default(sum_132, [2304]); sum_132 = None\n permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]); permute_319 = None\n view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]); mm_89 = None\n mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16); primals_16 = None\n mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768)\n sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)\n mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8); mul_352 = None\n sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True); mul_354 = None\n mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134); sum_134 = None\n sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133); mul_353 = sum_133 = None\n sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355); sub_103 = mul_355 = None\n mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104); div_22 = sub_104 = None\n mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8); mul_8 = None\n sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]); mul_357 = None\n sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]); view_323 = None\n add_142 = torch.ops.aten.add.Tensor(add_141, mul_356); add_141 = mul_356 = None\n view_324 = torch.ops.aten.view.default(add_142, [64, 768])\n mm_91 = torch.ops.aten.mm.default(view_324, permute_321); permute_321 = None\n permute_322 = torch.ops.aten.permute.default(view_324, [1, 0])\n mm_92 = torch.ops.aten.mm.default(permute_322, view_10); permute_322 = view_10 = None\n permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]); mm_92 = None\n sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True); view_324 = None\n view_325 = torch.ops.aten.view.default(sum_137, [768]); sum_137 = None\n permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]); permute_323 = None\n view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]); mm_91 = None\n view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]); addmm_2 = None\n mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4); mul_4 = None\n pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715); pow_1 = None\n add_6 = torch.ops.aten.add.Tensor(view_9, mul_5); mul_5 = None\n mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654); add_6 = None\n tanh = torch.ops.aten.tanh.default(mul_6); mul_6 = None\n add_7 = torch.ops.aten.add.Tensor(tanh, 1.0)\n mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7); view_326 = add_7 = None\n mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh); tanh = None\n sub_105 = torch.ops.aten.sub.Tensor(1, mul_360); mul_360 = None\n mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105); mul_358 = sub_105 = None\n mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654); mul_361 = None\n mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715)\n pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0); view_9 = None\n mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0); pow_24 = None\n mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364); mul_363 = mul_364 = None\n add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365); mul_362 = mul_365 = None\n mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5); mul_359 = None\n add_144 = torch.ops.aten.add.Tensor(add_143, mul_366); add_143 = mul_366 = None\n view_327 = torch.ops.aten.view.default(add_144, [64, 3072]); add_144 = None\n mm_93 = torch.ops.aten.mm.default(view_327, permute_325); permute_325 = None\n permute_326 = torch.ops.aten.permute.default(view_327, [1, 0])\n mm_94 = torch.ops.aten.mm.default(permute_326, view_8); permute_326 = view_8 = None\n permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]); mm_94 = None\n sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True); view_327 = None\n view_328 = torch.ops.aten.view.default(sum_138, [3072]); sum_138 = None\n permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]); permute_327 = None\n view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]); mm_93 = None\n mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10); primals_10 = None\n mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768)\n sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)\n mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2); mul_368 = None\n sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True); mul_370 = None\n mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140); sum_140 = None\n sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139); mul_369 = sum_139 = None\n sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371); sub_107 = mul_371 = None\n mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108); div_23 = sub_108 = None\n mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2); mul_2 = None\n sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]); mul_373 = None\n sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]); view_329 = None\n add_145 = torch.ops.aten.add.Tensor(add_142, mul_372); add_142 = mul_372 = None\n view_330 = torch.ops.aten.view.default(add_145, [64, 768])\n mm_95 = torch.ops.aten.mm.default(view_330, permute_329); permute_329 = None\n permute_330 = torch.ops.aten.permute.default(view_330, [1, 0])\n permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]); permute_4 = None\n view_6 = torch.ops.aten.view.default(view_5, [64, 768]); view_5 = None\n mm_96 = torch.ops.aten.mm.default(permute_330, view_6); permute_330 = view_6 = None\n permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]); mm_96 = None\n sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True); view_330 = None\n view_331 = torch.ops.aten.view.default(sum_143, [768]); sum_143 = None\n permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]); permute_331 = None\n view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]); mm_95 = None\n view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]); view_332 = None\n permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]); view_333 = None\n _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True); permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None\n getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0]\n getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1]\n getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2]; _scaled_dot_product_efficient_attention_backward_11 = None\n permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]); getitem_180 = None\n view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]); permute_334 = None\n permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]); getitem_178 = None\n view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]); permute_335 = None\n permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]); getitem_179 = None\n view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]); permute_336 = None\n cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2); view_335 = view_336 = view_334 = None\n view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]); cat_11 = None\n mm_97 = torch.ops.aten.mm.default(view_337, permute_337); permute_337 = None\n permute_338 = torch.ops.aten.permute.default(view_337, [1, 0])\n mm_98 = torch.ops.aten.mm.default(permute_338, view); permute_338 = view = None\n permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]); mm_98 = None\n sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True); view_337 = None\n view_338 = torch.ops.aten.view.default(sum_144, [2304]); sum_144 = None\n permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]); permute_339 = None\n view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]); mm_97 = None\n mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4); primals_4 = None\n mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768)\n sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)\n mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul); mul_375 = None\n sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True); mul_377 = None\n mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146); sum_146 = None\n sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145); mul_376 = sum_145 = None\n sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378); sub_110 = mul_378 = None\n mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111); div_24 = sub_111 = None\n mul_380 = torch.ops.aten.mul.Tensor(view_339, mul); mul = None\n sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]); mul_380 = None\n sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]); view_339 = None\n add_146 = torch.ops.aten.add.Tensor(add_145, mul_379); add_145 = mul_379 = None\n eq = torch.ops.aten.eq.Scalar(unsqueeze, -1)\n unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1); eq = None\n full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146); unsqueeze_1 = None\n full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True); full_default_5 = unsqueeze = where = None\n eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1)\n unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1); eq_1 = None\n where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146); unsqueeze_2 = full_default_4 = add_146 = None\n full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True); full_default_7 = primals_1 = where_1 = None\n add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1); permute_100 = _unsafe_index_put_1 = None\n return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)\n \n# To see more debug info, please use `graph_module.print_readable()`", "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[26]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[34]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[35]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[47]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[48]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[60]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[61]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[73]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[74]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[86]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[87]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[99]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[100]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[112]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[113]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[125]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[126]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[138]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[139]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[151]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[152]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[164]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[165]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[177]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[178]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aba44qxan7tyih7ljdxyqka53vkn25cmdzgth5cyl2s7qorx7vi] example_inputs[184]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[qitbyzr7emyctium3gjpb6gcr75vrxwd24qiyojnre7qqd7zo4f] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[u55blbbc73afkevwx6ofprgxxytl7dbrkgoal4z3b6od3qdlugs] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 1, 50304]), stride=(50304, 50304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_backward]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[y3e3yuxtssnww62nt5exdblxjs4qqfe6m45lbogy57sjgkkgd7s] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259]", "[gqceoov337f2fxydib545wytq5n6f565gacorvs7dzatfibcnjq] fx_kwargs[user_visible_outputs]: {'add_147': None, '_unsafe_index_put': None, 'sum_147': None, 'sum_148': None, 'permute_340': None, 'view_338': None, 'permute_332': None, 'view_331': None, 'sum_141': None, 'sum_142': None, 'permute_328': None, 'view_328': None, 'permute_324': None, 'view_325': None, 'sum_135': None, 'sum_136': None, 'permute_320': None, 'view_322': None, 'permute_312': None, 'view_315': None, 'sum_129': None, 'sum_130': None, 'permute_308': None, 'view_312': None, 'permute_304': None, 'view_309': None, 'sum_123': None, 'sum_124': None, 'permute_300': None, 'view_306': None, 'permute_292': None, 'view_299': None, 'sum_117': None, 'sum_118': None, 'permute_288': None, 'view_296': None, 'permute_284': None, 'view_293': None, 'sum_111': None, 'sum_112': None, 'permute_280': None, 'view_290': None, 'permute_272': None, 'view_283': None, 'sum_105': None, 'sum_106': None, 'permute_268': None, 'view_280': None, 'permute_264': None, 'view_277': None, 'sum_99': None, 'sum_100': None, 'permute_260': None, 'view_274': None, 'permute_252': None, 'view_267': None, 'sum_93': None, 'sum_94': None, 'permute_248': None, 'view_264': None, 'permute_244': None, 'view_261': None, 'sum_87': None, 'sum_88': None, 'permute_240': None, 'view_258': None, 'permute_232': None, 'view_251': None, 'sum_81': None, 'sum_82': None, 'permute_228': None, 'view_248': None, 'permute_224': None, 'view_245': None, 'sum_75': None, 'sum_76': None, 'permute_220': None, 'view_242': None, 'permute_212': None, 'view_235': None, 'sum_69': None, 'sum_70': None, 'permute_208': None, 'view_232': None, 'permute_204': None, 'view_229': None, 'sum_63': None, 'sum_64': None, 'permute_200': None, 'view_226': None, 'permute_192': None, 'view_219': None, 'sum_57': None, 'sum_58': None, 'permute_188': None, 'view_216': None, 'permute_184': None, 'view_213': None, 'sum_51': None, 'sum_52': None, 'permute_180': None, 'view_210': None, 'permute_172': None, 'view_203': None, 'sum_45': None, 'sum_46': None, 'permute_168': None, 'view_200': None, 'permute_164': None, 'view_197': None, 'sum_39': None, 'sum_40': None, 'permute_160': None, 'view_194': None, 'permute_152': None, 'view_187': None, 'sum_33': None, 'sum_34': None, 'permute_148': None, 'view_184': None, 'permute_144': None, 'view_181': None, 'sum_27': None, 'sum_28': None, 'permute_140': None, 'view_178': None, 'permute_132': None, 'view_171': None, 'sum_21': None, 'sum_22': None, 'permute_128': None, 'view_168': None, 'permute_124': None, 'view_165': None, 'sum_15': None, 'sum_16': None, 'permute_120': None, 'view_162': None, 'permute_112': None, 'view_155': None, 'sum_9': None, 'sum_10': None, 'permute_108': None, 'view_152': None, 'permute_104': None, 'view_149': None, 'sum_3': None, 'sum_4': None}", "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[0]: 260", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]} +V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "686420174d58db1ad3214083421e88f9"} + { + "name": "inductor_compile", + "ts": 1722977765129674.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "850f6f968f76596e70f57ae3686f27cf"} + { + "name": "compile_fx_inner", + "ts": 1722977765129750.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:779] {"bwd_compilation_metrics": {"compile_id": "2/0", "inductor_compile_time_s": 3.4067680835723877, "code_gen_time_s": 2.149840831756592, "fail_type": null, "fail_reason": null}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "69414400ce6973315a6724bd1dd0d6bf"} + { + "name": "compile_fx..bw_compiler", + "ts": 1722977765129957.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.130000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "847a3ca2b47dfe66846b3cdd55fea2ea"} + { + "name": "cudagraphify", + "ts": 1722977765130112.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.130000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e344de4b4e97bc4240303d1f7dcda48b"} + { + "name": "cudagraphify", + "ts": 1722977765130344.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.132000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "20f2823ad218c39ac107385bbcfa08cf"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765132271.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.223000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d0d90302be22d31408953ec1b96010a8"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765223053.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.223000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "043f9463cae9b57649917ba2b04a7e1e"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765223638.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.323000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "aa7c41d96ff04f6514db96103ec2287b"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765323165.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.323000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "384acc1950f5815398f79747e3f5e001"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765323857.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.462000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "51e9926bccb19798777f8000fe5635cb"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765462544.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.463000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a81cd6152b3cf0c659966fd5b2b9c485"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765463647.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.561000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f5ac2eb196831b6937a7e75656a3ad23"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765561239.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.562000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36e548f418cc6c70c9ef91f3523765e7"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765562200.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.654000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "adfd278d27ad807d526a30932e2bc11d"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765654414.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.659000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7479b2b73e4e225784e8b098e9479691"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765659076.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.749000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36758ab08d320e75bb2c38aa80d3ab33"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765749824.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.750000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ba5bcd94c34aa67b5fbeea5198740fbe"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765750428.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.855000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "376b0d8bcb7c3e527a0c1824b0cddd6d"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765855913.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.856000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "90afeea3d97dee27dc597124faf2bbab"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765856592.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.974000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fe5fe351f15c2f002287da26abdc3f46"} + { + "name": "CachingAutotuner.benchmark_all_configs", + "ts": 1722977765974828.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.977000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}]}, "frame_id": 5, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:05.978000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4a166952c3eac616029712f421ffc6f6"} + { + "name": "_compile.compile_inner", + "ts": 1722977765977996.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.978000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f367db4c90368f8adca1c5928e8f1ecd"} + { + "name": "entire_frame_compile", + "ts": 1722977765978102.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.987000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 5, "frame_compile_id": 0, "attempt": 1, "has_payload": "355c5f9c92c9f13aaa98a227db5202aa"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self) + | | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624) + +V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "82bed6465739fc263981b76f23f4da3f"} + { + "name": "entire_frame_compile", + "ts": 1722977765987554.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2a804cb02fc3bf2d3e4e8933c6436465"} + { + "name": "_compile.compile_inner", + "ts": 1722977765987622.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "5/0", "frame_key": "6", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 445, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 6, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977765.977939, "entire_frame_compile_time_s": 0.009676456451416016, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/jjwu/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.005461215972900391, "has_guarded_code": true}, "frame_id": 5, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:05.988000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", 4]} +V0806 13:56:05.988000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 458, "name": "wrapper", "filename": 4}]}, "frame_id": 6, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:05.988000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "07efc42e14dae367a419acb8428c4b5a"} + { + "name": "_compile.compile_inner", + "ts": 1722977765988338.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.988000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "8b6fb54af61bcb682883a1f983d168eb"} + { + "name": "entire_frame_compile", + "ts": 1722977765988403.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.998000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 6, "frame_compile_id": 0, "attempt": 1, "has_payload": "82dd580f72d0e44676dd3f7c6f142b65"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['args'], accessed_by=DictGetItemGuardAccessor(args) + | | +- TYPE_MATCH: ___check_type_id(L['args'], 94206128741824) + | | +- LENGTH_CHECK: len(L['args']) == 1 + | | +- GuardManager: source=L['args'][0], accessed_by=TupleGetItemGuardAccessor(0) + | | | +- ID_MATCH: ___check_obj_id(L['args'][0], 140561654732528) + | | | +- DictSubclassGuardManager: source=L['args'][0]._optimizer_step_pre_hooks, accessed_by=GetAttrGuardAccessor(_optimizer_step_pre_hooks) + | +- GuardManager: source=L['func'], accessed_by=DictGetItemGuardAccessor(func) + | | +- GuardManager: source=L['func'].__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | +- ID_MATCH: ___check_obj_id(L['func'].__code__, 140563175560928) + | +- GuardManager: source=L['kwargs'], accessed_by=DictGetItemGuardAccessor(kwargs) + | | +- DICT_LENGTH: not L['kwargs'] + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['cast'], accessed_by=DictGetItemGuardAccessor(cast) + | | | +- ID_MATCH: ___check_obj_id(G['cast'], 140565182496976) + | | +- GuardManager: source=G['chain'], accessed_by=DictGetItemGuardAccessor(chain) + | | | +- ID_MATCH: ___check_obj_id(G['chain'], 94206128678976) + | | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor(torch) + | | | +- ID_MATCH: ___check_obj_id(G['torch'], 140565184683664) + | | | +- GuardManager: source=G['torch'].autograd, accessed_by=GetAttrGuardAccessor(autograd) + | | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd, 140563304246272) + | | | | +- GuardManager: source=G['torch'].autograd.profiler, accessed_by=GetAttrGuardAccessor(profiler) + | | | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd.profiler, 140563303232784) + | | | | | +- GuardManager: source=G['torch'].autograd.profiler.record_function, accessed_by=GetAttrGuardAccessor(record_function) + | | | | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd.profiler.record_function, 94206190338752) + | | +- GuardManager: source=G['Optimizer'], accessed_by=DictGetItemGuardAccessor(Optimizer) + | | | +- ID_MATCH: ___check_obj_id(G['Optimizer'], 94206202190960) + | | +- DictSubclassGuardManager: source=G['_global_optimizer_pre_hooks'], accessed_by=DictGetItemGuardAccessor(_global_optimizer_pre_hooks) + +V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0f43d692b85e14a4ad120ac1f7fc6fa0"} + { + "name": "entire_frame_compile", + "ts": 1722977765998224.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "aa969914199ac5895a47db38f95b4122"} + { + "name": "_compile.compile_inner", + "ts": 1722977765998296.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "6/0", "frame_key": "8", "co_name": "wrapper", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", "co_firstlineno": 458, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 21, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977765.9883175, "entire_frame_compile_time_s": 0.010008811950683594, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/jjwu/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.0030748844146728516, "has_guarded_code": true}, "frame_id": 6, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:05.999000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/optim/adam.py", 5]} +V0806 13:56:05.999000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 478, "name": "wrapper", "filename": 4}, {"line": 90, "name": "_use_grad", "filename": 4}, {"line": 197, "name": "step", "filename": 5}]}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:05.999000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "710e3b9f076a27c725bfcbb09fe8757e"} + { + "name": "_compile.compile_inner", + "ts": 1722977765999192.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:05.999000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36c624efa1d29acebea4d1cbc6eefad3"} + { + "name": "entire_frame_compile", + "ts": 1722977765999253.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 1, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 0, "grad": 1, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 0, "source": "L['self'].param_groups[0]['params'][0]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 3, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "grad": 3, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 2, "source": "L['self'].param_groups[0]['params'][1]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 5, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 5, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 4, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "grad": 5, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 4, "source": "L['self'].param_groups[0]['params'][2]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 7, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 7, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 6, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "grad": 7, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 6, "source": "L['self'].param_groups[0]['params'][3]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 9, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 9, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 8, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 8, "grad": 9, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 8, "source": "L['self'].param_groups[0]['params'][4]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 11, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 10, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "grad": 11, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 10, "source": "L['self'].param_groups[0]['params'][5]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 13, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 13, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 12, "grad": 13, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 12, "source": "L['self'].param_groups[0]['params'][6]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 15, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 15, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 14, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "grad": 15, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 14, "source": "L['self'].param_groups[0]['params'][7]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 17, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 17, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 16, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "grad": 17, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 16, "source": "L['self'].param_groups[0]['params'][8]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 19, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 19, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 18, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "grad": 19, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 18, "source": "L['self'].param_groups[0]['params'][9]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 21, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 21, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 20, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 20, "grad": 21, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 20, "source": "L['self'].param_groups[0]['params'][10]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 23, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 23, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 22, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "grad": 23, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 22, "source": "L['self'].param_groups[0]['params'][11]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 25, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 25, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 24, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 24, "grad": 25, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 24, "source": "L['self'].param_groups[0]['params'][12]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 27, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 27, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 26, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "grad": 27, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 26, "source": "L['self'].param_groups[0]['params'][13]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 29, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 29, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 28, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "grad": 29, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 28, "source": "L['self'].param_groups[0]['params'][14]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 31, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 31, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 30, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "grad": 31, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 30, "source": "L['self'].param_groups[0]['params'][15]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 33, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 33, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 32, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 32, "grad": 33, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 32, "source": "L['self'].param_groups[0]['params'][16]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 35, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 35, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 34, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "grad": 35, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 34, "source": "L['self'].param_groups[0]['params'][17]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 37, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 36, "grad": 37, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 36, "source": "L['self'].param_groups[0]['params'][18]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 39, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 39, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 38, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "grad": 39, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 38, "source": "L['self'].param_groups[0]['params'][19]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 41, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 41, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 40, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "grad": 41, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 40, "source": "L['self'].param_groups[0]['params'][20]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 43, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 43, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 42, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "grad": 43, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 42, "source": "L['self'].param_groups[0]['params'][21]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 45, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 44, "grad": 45, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 44, "source": "L['self'].param_groups[0]['params'][22]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 47, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 47, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 46, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "grad": 47, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 46, "source": "L['self'].param_groups[0]['params'][23]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 49, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 48, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 48, "grad": 49, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 48, "source": "L['self'].param_groups[0]['params'][24]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 51, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 51, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 50, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "grad": 51, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 50, "source": "L['self'].param_groups[0]['params'][25]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 53, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 53, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 52, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "grad": 53, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 52, "source": "L['self'].param_groups[0]['params'][26]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 55, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 55, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 54, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "grad": 55, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 54, "source": "L['self'].param_groups[0]['params'][27]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 57, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 57, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 56, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 56, "grad": 57, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 56, "source": "L['self'].param_groups[0]['params'][28]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 59, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 59, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 58, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "grad": 59, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 58, "source": "L['self'].param_groups[0]['params'][29]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 61, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 61, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 60, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 60, "grad": 61, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 60, "source": "L['self'].param_groups[0]['params'][30]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 63, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 63, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 62, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "grad": 63, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 62, "source": "L['self'].param_groups[0]['params'][31]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 65, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 65, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 64, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "grad": 65, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 64, "source": "L['self'].param_groups[0]['params'][32]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 67, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "grad": 67, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 66, "source": "L['self'].param_groups[0]['params'][33]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 69, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 69, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 68, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 68, "grad": 69, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 68, "source": "L['self'].param_groups[0]['params'][34]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 71, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 70, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "grad": 71, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 70, "source": "L['self'].param_groups[0]['params'][35]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 73, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 73, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 72, "grad": 73, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 72, "source": "L['self'].param_groups[0]['params'][36]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 75, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 75, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 74, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "grad": 75, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 74, "source": "L['self'].param_groups[0]['params'][37]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 77, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 77, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "grad": 77, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 76, "source": "L['self'].param_groups[0]['params'][38]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 79, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 79, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 78, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "grad": 79, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 78, "source": "L['self'].param_groups[0]['params'][39]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 81, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 80, "grad": 81, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 80, "source": "L['self'].param_groups[0]['params'][40]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 83, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 83, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 82, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "grad": 83, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 82, "source": "L['self'].param_groups[0]['params'][41]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 85, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 84, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 84, "grad": 85, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 84, "source": "L['self'].param_groups[0]['params'][42]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 87, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 87, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 86, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "grad": 87, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 86, "source": "L['self'].param_groups[0]['params'][43]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 89, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 88, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "grad": 89, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 88, "source": "L['self'].param_groups[0]['params'][44]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 91, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 91, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "grad": 91, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 90, "source": "L['self'].param_groups[0]['params'][45]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 93, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 93, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 92, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 92, "grad": 93, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 92, "source": "L['self'].param_groups[0]['params'][46]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 95, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 95, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "grad": 95, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 94, "source": "L['self'].param_groups[0]['params'][47]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 97, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 97, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 96, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 96, "grad": 97, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 96, "source": "L['self'].param_groups[0]['params'][48]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 99, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "grad": 99, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 98, "source": "L['self'].param_groups[0]['params'][49]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 101, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 101, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 100, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "grad": 101, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 100, "source": "L['self'].param_groups[0]['params'][50]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 103, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 102, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "grad": 103, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 102, "source": "L['self'].param_groups[0]['params'][51]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 105, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 105, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 104, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 104, "grad": 105, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 104, "source": "L['self'].param_groups[0]['params'][52]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 107, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 106, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "grad": 107, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 106, "source": "L['self'].param_groups[0]['params'][53]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 109, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 109, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 108, "grad": 109, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 108, "source": "L['self'].param_groups[0]['params'][54]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 111, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 111, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 110, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "grad": 111, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 110, "source": "L['self'].param_groups[0]['params'][55]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 113, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 113, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "grad": 113, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 112, "source": "L['self'].param_groups[0]['params'][56]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 115, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 115, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 114, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "grad": 115, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 114, "source": "L['self'].param_groups[0]['params'][57]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 117, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 116, "grad": 117, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 116, "source": "L['self'].param_groups[0]['params'][58]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 119, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 119, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 118, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "grad": 119, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 118, "source": "L['self'].param_groups[0]['params'][59]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 121, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 120, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 120, "grad": 121, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 120, "source": "L['self'].param_groups[0]['params'][60]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 123, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 123, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 122, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "grad": 123, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 122, "source": "L['self'].param_groups[0]['params'][61]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 125, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 124, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "grad": 125, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 124, "source": "L['self'].param_groups[0]['params'][62]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 127, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 127, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "grad": 127, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 126, "source": "L['self'].param_groups[0]['params'][63]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 129, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 129, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 128, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 128, "grad": 129, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 128, "source": "L['self'].param_groups[0]['params'][64]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 131, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 131, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "grad": 131, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 130, "source": "L['self'].param_groups[0]['params'][65]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 133, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 133, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 132, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 132, "grad": 133, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 132, "source": "L['self'].param_groups[0]['params'][66]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 135, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "grad": 135, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 134, "source": "L['self'].param_groups[0]['params'][67]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 137, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 137, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 136, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "grad": 137, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 136, "source": "L['self'].param_groups[0]['params'][68]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 139, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 138, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "grad": 139, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 138, "source": "L['self'].param_groups[0]['params'][69]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 141, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 141, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 140, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 140, "grad": 141, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 140, "source": "L['self'].param_groups[0]['params'][70]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 143, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 142, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "grad": 143, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 142, "source": "L['self'].param_groups[0]['params'][71]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 145, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 145, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 144, "grad": 145, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 144, "source": "L['self'].param_groups[0]['params'][72]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 147, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 147, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 146, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "grad": 147, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 146, "source": "L['self'].param_groups[0]['params'][73]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 149, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 149, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 149, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "grad": 149, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 148, "source": "L['self'].param_groups[0]['params'][74]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 150, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 151, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 151, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 151, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 150, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 150, "grad": 151, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 150, "source": "L['self'].param_groups[0]['params'][75]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 152, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 153, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 153, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 152, "grad": 153, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 152, "source": "L['self'].param_groups[0]['params'][76]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 154, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 155, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 155, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 155, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 154, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 154, "grad": 155, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 154, "source": "L['self'].param_groups[0]['params'][77]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 156, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 157, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 157, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 156, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 156, "grad": 157, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 156, "source": "L['self'].param_groups[0]['params'][78]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 158, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 159, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 159, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 159, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 158, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 158, "grad": 159, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 158, "source": "L['self'].param_groups[0]['params'][79]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 160, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 161, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 161, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 160, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 160, "grad": 161, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 160, "source": "L['self'].param_groups[0]['params'][80]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 162, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 163, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 163, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 163, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 162, "grad": 163, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 162, "source": "L['self'].param_groups[0]['params'][81]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 164, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 165, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 165, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 165, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 164, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 164, "grad": 165, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 164, "source": "L['self'].param_groups[0]['params'][82]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 166, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 167, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 167, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 167, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 166, "grad": 167, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 166, "source": "L['self'].param_groups[0]['params'][83]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 168, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 169, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 169, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 169, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 168, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 168, "grad": 169, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 168, "source": "L['self'].param_groups[0]['params'][84]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 170, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 171, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 171, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 170, "grad": 171, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 170, "source": "L['self'].param_groups[0]['params'][85]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 172, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 173, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 173, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 173, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 172, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 172, "grad": 173, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 172, "source": "L['self'].param_groups[0]['params'][86]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 174, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 175, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 175, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 174, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 174, "grad": 175, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 174, "source": "L['self'].param_groups[0]['params'][87]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 176, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 177, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 177, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 177, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 176, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 176, "grad": 177, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 176, "source": "L['self'].param_groups[0]['params'][88]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 178, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 179, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 179, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 178, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 178, "grad": 179, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 178, "source": "L['self'].param_groups[0]['params'][89]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 180, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 181, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 181, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 181, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 180, "grad": 181, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 180, "source": "L['self'].param_groups[0]['params'][90]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 182, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 183, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 183, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 183, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 182, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 182, "grad": 183, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 182, "source": "L['self'].param_groups[0]['params'][91]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 184, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 185, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 185, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 185, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 184, "grad": 185, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 184, "source": "L['self'].param_groups[0]['params'][92]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 186, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 187, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 187, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 187, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 186, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 186, "grad": 187, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 186, "source": "L['self'].param_groups[0]['params'][93]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 188, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 189, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 189, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 188, "grad": 189, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 188, "source": "L['self'].param_groups[0]['params'][94]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 190, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 191, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 191, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 191, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 190, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 190, "grad": 191, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 190, "source": "L['self'].param_groups[0]['params'][95]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 192, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 193, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 193, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 192, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 192, "grad": 193, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 192, "source": "L['self'].param_groups[0]['params'][96]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 194, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 195, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 195, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 195, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 194, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 194, "grad": 195, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 194, "source": "L['self'].param_groups[0]['params'][97]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 196, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 197, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 197, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 196, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 196, "grad": 197, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 196, "source": "L['self'].param_groups[0]['params'][98]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 198, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 199, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 199, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 199, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 198, "grad": 199, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 198, "source": "L['self'].param_groups[0]['params'][99]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 200, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 201, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 201, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 201, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 200, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 200, "grad": 201, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 200, "source": "L['self'].param_groups[0]['params'][100]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 202, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 203, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 203, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 203, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 202, "grad": 203, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 202, "source": "L['self'].param_groups[0]['params'][101]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 204, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 205, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 205, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 205, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 204, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 204, "grad": 205, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 204, "source": "L['self'].param_groups[0]['params'][102]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 206, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 207, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 207, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 206, "grad": 207, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 206, "source": "L['self'].param_groups[0]['params'][103]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 208, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 209, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 209, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 209, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 208, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 208, "grad": 209, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 208, "source": "L['self'].param_groups[0]['params'][104]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 210, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 211, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 211, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 210, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 210, "grad": 211, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 210, "source": "L['self'].param_groups[0]['params'][105]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 212, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 213, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 213, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 213, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 212, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 212, "grad": 213, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 212, "source": "L['self'].param_groups[0]['params'][106]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 214, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 215, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 215, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 214, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 214, "grad": 215, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 214, "source": "L['self'].param_groups[0]['params'][107]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 216, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 217, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 217, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 217, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 216, "grad": 217, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 216, "source": "L['self'].param_groups[0]['params'][108]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 218, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 219, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 219, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 219, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 218, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 218, "grad": 219, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 218, "source": "L['self'].param_groups[0]['params'][109]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 220, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 221, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 221, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 221, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 220, "grad": 221, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 220, "source": "L['self'].param_groups[0]['params'][110]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 222, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 223, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 223, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 223, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 222, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 222, "grad": 223, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 222, "source": "L['self'].param_groups[0]['params'][111]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 224, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 225, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 225, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 225, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 224, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 224, "grad": 225, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 224, "source": "L['self'].param_groups[0]['params'][112]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 226, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 227, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 227, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 227, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 226, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 226, "grad": 227, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 226, "source": "L['self'].param_groups[0]['params'][113]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 228, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 229, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 229, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 229, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 228, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 228, "grad": 229, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 228, "source": "L['self'].param_groups[0]['params'][114]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 230, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 231, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 231, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 231, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 230, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 230, "grad": 231, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 230, "source": "L['self'].param_groups[0]['params'][115]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 232, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 233, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 233, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 233, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 232, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 232, "grad": 233, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 232, "source": "L['self'].param_groups[0]['params'][116]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 234, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 235, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 235, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 235, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 234, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 234, "grad": 235, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 234, "source": "L['self'].param_groups[0]['params'][117]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 236, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 237, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 237, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 237, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 236, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 236, "grad": 237, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 236, "source": "L['self'].param_groups[0]['params'][118]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 238, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 239, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 239, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 239, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 238, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 238, "grad": 239, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 238, "source": "L['self'].param_groups[0]['params'][119]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 240, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 241, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 241, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 241, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 240, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 240, "grad": 241, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 240, "source": "L['self'].param_groups[0]['params'][120]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 242, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 243, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 243, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 243, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 242, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 242, "grad": 243, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 242, "source": "L['self'].param_groups[0]['params'][121]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 244, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 245, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 245, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 245, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 244, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 244, "grad": 245, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 244, "source": "L['self'].param_groups[0]['params'][122]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 246, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 247, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 247, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 247, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 246, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 246, "grad": 247, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 246, "source": "L['self'].param_groups[0]['params'][123]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 248, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 249, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 249, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 249, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 248, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 248, "grad": 249, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 248, "source": "L['self'].param_groups[0]['params'][124]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 250, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 251, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 251, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 251, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 250, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 250, "grad": 251, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 250, "source": "L['self'].param_groups[0]['params'][125]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 252, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 253, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 253, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 253, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 252, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 252, "grad": 253, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 252, "source": "L['self'].param_groups[0]['params'][126]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 254, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 255, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 255, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 255, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 254, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 254, "grad": 255, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 254, "source": "L['self'].param_groups[0]['params'][127]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 256, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 257, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 257, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 257, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 256, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 256, "grad": 257, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 256, "source": "L['self'].param_groups[0]['params'][128]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 258, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 259, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 259, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 259, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 258, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 258, "grad": 259, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 258, "source": "L['self'].param_groups[0]['params'][129]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 260, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 261, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 261, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 261, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 260, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 260, "grad": 261, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 260, "source": "L['self'].param_groups[0]['params'][130]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 262, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 263, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 263, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 263, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 262, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 262, "grad": 263, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 262, "source": "L['self'].param_groups[0]['params'][131]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 264, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 265, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 265, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 265, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 264, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 264, "grad": 265, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 264, "source": "L['self'].param_groups[0]['params'][132]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 266, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 267, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 267, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 267, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 266, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 266, "grad": 267, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 266, "source": "L['self'].param_groups[0]['params'][133]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 268, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 269, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 269, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 269, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 268, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 268, "grad": 269, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 268, "source": "L['self'].param_groups[0]['params'][134]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 270, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 271, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 271, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 271, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 270, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 270, "grad": 271, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 270, "source": "L['self'].param_groups[0]['params'][135]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 272, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 273, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 273, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 273, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 272, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 272, "grad": 273, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 272, "source": "L['self'].param_groups[0]['params'][136]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 274, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 275, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 275, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 275, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 274, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 274, "grad": 275, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 274, "source": "L['self'].param_groups[0]['params'][137]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 276, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 277, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 277, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 277, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 276, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 276, "grad": 277, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 276, "source": "L['self'].param_groups[0]['params'][138]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 278, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 279, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 279, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 279, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 278, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 278, "grad": 279, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 278, "source": "L['self'].param_groups[0]['params'][139]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 280, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 281, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 281, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 281, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 280, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 280, "grad": 281, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 280, "source": "L['self'].param_groups[0]['params'][140]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 282, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 283, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 283, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 283, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 282, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 282, "grad": 283, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 282, "source": "L['self'].param_groups[0]['params'][141]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 284, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 285, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 285, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 285, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 284, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 284, "grad": 285, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 284, "source": "L['self'].param_groups[0]['params'][142]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 286, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 287, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 287, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 287, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 286, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 286, "grad": 287, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 286, "source": "L['self'].param_groups[0]['params'][143]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 288, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 289, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 289, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 289, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 288, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 288, "grad": 289, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 288, "source": "L['self'].param_groups[0]['params'][144]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 290, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 291, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 291, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 291, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 290, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 290, "grad": 291, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 290, "source": "L['self'].param_groups[0]['params'][145]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 292, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 293, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 293, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 293, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 292, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 292, "grad": 293, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 292, "source": "L['self'].param_groups[0]['params'][146]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 294, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 295, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 295, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 295, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 294, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 294, "grad": 295, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 294, "source": "L['self'].param_groups[0]['params'][147]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 296, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 296, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 296, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 296, "source": "L['self'].state[list(L['self'].state.keys())[1]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.441000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 297, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.441000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 297, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 297, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 297, "source": "L['self'].state[list(L['self'].state.keys())[1]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 298, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 298, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 298, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 298, "source": "L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 299, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 299, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 299, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 299, "source": "L['self'].state[list(L['self'].state.keys())[0]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 300, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 300, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 300, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 300, "source": "L['self'].state[list(L['self'].state.keys())[2]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 301, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 301, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 301, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 301, "source": "L['self'].state[list(L['self'].state.keys())[3]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 302, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 302, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 302, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 302, "source": "L['self'].state[list(L['self'].state.keys())[4]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 303, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 303, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 303, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 303, "source": "L['self'].state[list(L['self'].state.keys())[5]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 304, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 304, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 304, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 304, "source": "L['self'].state[list(L['self'].state.keys())[6]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 305, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 305, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 305, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 305, "source": "L['self'].state[list(L['self'].state.keys())[7]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 306, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 306, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 306, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 306, "source": "L['self'].state[list(L['self'].state.keys())[8]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 307, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 307, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 307, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 307, "source": "L['self'].state[list(L['self'].state.keys())[9]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 308, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 308, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 308, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 308, "source": "L['self'].state[list(L['self'].state.keys())[10]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 309, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 309, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 309, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 309, "source": "L['self'].state[list(L['self'].state.keys())[11]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 310, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 310, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 310, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 310, "source": "L['self'].state[list(L['self'].state.keys())[12]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 311, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 311, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 311, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 311, "source": "L['self'].state[list(L['self'].state.keys())[13]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 312, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 312, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 312, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 312, "source": "L['self'].state[list(L['self'].state.keys())[14]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 313, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 313, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 313, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 313, "source": "L['self'].state[list(L['self'].state.keys())[15]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 314, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 314, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 314, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 314, "source": "L['self'].state[list(L['self'].state.keys())[16]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 315, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 315, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 315, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 315, "source": "L['self'].state[list(L['self'].state.keys())[17]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 316, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 316, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 316, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 316, "source": "L['self'].state[list(L['self'].state.keys())[18]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 317, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 317, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 317, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 317, "source": "L['self'].state[list(L['self'].state.keys())[19]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 318, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 318, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 318, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 318, "source": "L['self'].state[list(L['self'].state.keys())[20]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 319, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 319, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 319, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 319, "source": "L['self'].state[list(L['self'].state.keys())[21]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 320, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 320, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 320, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 320, "source": "L['self'].state[list(L['self'].state.keys())[22]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 321, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 321, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 321, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 321, "source": "L['self'].state[list(L['self'].state.keys())[23]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 322, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 322, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 322, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 322, "source": "L['self'].state[list(L['self'].state.keys())[24]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 323, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 323, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 323, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 323, "source": "L['self'].state[list(L['self'].state.keys())[25]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 324, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 324, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 324, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 324, "source": "L['self'].state[list(L['self'].state.keys())[26]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 325, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 325, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 325, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 325, "source": "L['self'].state[list(L['self'].state.keys())[27]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 326, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 326, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 326, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 326, "source": "L['self'].state[list(L['self'].state.keys())[28]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 327, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 327, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 327, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 327, "source": "L['self'].state[list(L['self'].state.keys())[29]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 328, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 328, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 328, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 328, "source": "L['self'].state[list(L['self'].state.keys())[30]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 329, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 329, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 329, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 329, "source": "L['self'].state[list(L['self'].state.keys())[31]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 330, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 330, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 330, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 330, "source": "L['self'].state[list(L['self'].state.keys())[32]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 331, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 331, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 331, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 331, "source": "L['self'].state[list(L['self'].state.keys())[33]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 332, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 332, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 332, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 332, "source": "L['self'].state[list(L['self'].state.keys())[34]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 333, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 333, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 333, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 333, "source": "L['self'].state[list(L['self'].state.keys())[35]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 334, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 334, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 334, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 334, "source": "L['self'].state[list(L['self'].state.keys())[36]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 335, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 335, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 335, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 335, "source": "L['self'].state[list(L['self'].state.keys())[37]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 336, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 336, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 336, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 336, "source": "L['self'].state[list(L['self'].state.keys())[38]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 337, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 337, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 337, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 337, "source": "L['self'].state[list(L['self'].state.keys())[39]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 338, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 338, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 338, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 338, "source": "L['self'].state[list(L['self'].state.keys())[40]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 339, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 339, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 339, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 339, "source": "L['self'].state[list(L['self'].state.keys())[41]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 340, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 340, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 340, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 340, "source": "L['self'].state[list(L['self'].state.keys())[42]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 341, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 341, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 341, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 341, "source": "L['self'].state[list(L['self'].state.keys())[43]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 342, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 342, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 342, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 342, "source": "L['self'].state[list(L['self'].state.keys())[44]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 343, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 343, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 343, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 343, "source": "L['self'].state[list(L['self'].state.keys())[45]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 344, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 344, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 344, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 344, "source": "L['self'].state[list(L['self'].state.keys())[46]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 345, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 345, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 345, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 345, "source": "L['self'].state[list(L['self'].state.keys())[47]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 346, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 346, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 346, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 346, "source": "L['self'].state[list(L['self'].state.keys())[48]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 347, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 347, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 347, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 347, "source": "L['self'].state[list(L['self'].state.keys())[49]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 348, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 348, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 348, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 348, "source": "L['self'].state[list(L['self'].state.keys())[50]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 349, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 349, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 349, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 349, "source": "L['self'].state[list(L['self'].state.keys())[51]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 350, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 350, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 350, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 350, "source": "L['self'].state[list(L['self'].state.keys())[52]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 351, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 351, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 351, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 351, "source": "L['self'].state[list(L['self'].state.keys())[53]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 352, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 352, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 352, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 352, "source": "L['self'].state[list(L['self'].state.keys())[54]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 353, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 353, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 353, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 353, "source": "L['self'].state[list(L['self'].state.keys())[55]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 354, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 354, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 354, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 354, "source": "L['self'].state[list(L['self'].state.keys())[56]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 355, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 355, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 355, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 355, "source": "L['self'].state[list(L['self'].state.keys())[57]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 356, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 356, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 356, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 356, "source": "L['self'].state[list(L['self'].state.keys())[58]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 357, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 357, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 357, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 357, "source": "L['self'].state[list(L['self'].state.keys())[59]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 358, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 358, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 358, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 358, "source": "L['self'].state[list(L['self'].state.keys())[60]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 359, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 359, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 359, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 359, "source": "L['self'].state[list(L['self'].state.keys())[61]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 360, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 360, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 360, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 360, "source": "L['self'].state[list(L['self'].state.keys())[62]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 361, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 361, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 361, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 361, "source": "L['self'].state[list(L['self'].state.keys())[63]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 362, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 362, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 362, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 362, "source": "L['self'].state[list(L['self'].state.keys())[64]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 363, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 363, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 363, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 363, "source": "L['self'].state[list(L['self'].state.keys())[65]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 364, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 364, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 364, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 364, "source": "L['self'].state[list(L['self'].state.keys())[66]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 365, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 365, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 365, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 365, "source": "L['self'].state[list(L['self'].state.keys())[67]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 366, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 366, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 366, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 366, "source": "L['self'].state[list(L['self'].state.keys())[68]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 367, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 367, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 367, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 367, "source": "L['self'].state[list(L['self'].state.keys())[69]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 368, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 368, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 368, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 368, "source": "L['self'].state[list(L['self'].state.keys())[70]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 369, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 369, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 369, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 369, "source": "L['self'].state[list(L['self'].state.keys())[71]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 370, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 370, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 370, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 370, "source": "L['self'].state[list(L['self'].state.keys())[72]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 371, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 371, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 371, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 371, "source": "L['self'].state[list(L['self'].state.keys())[73]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 372, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 372, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 372, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 372, "source": "L['self'].state[list(L['self'].state.keys())[74]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 373, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 373, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 373, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 373, "source": "L['self'].state[list(L['self'].state.keys())[75]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 374, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 374, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 374, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 374, "source": "L['self'].state[list(L['self'].state.keys())[76]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 375, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 375, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 375, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 375, "source": "L['self'].state[list(L['self'].state.keys())[77]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 376, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 376, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 376, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 376, "source": "L['self'].state[list(L['self'].state.keys())[78]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 377, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 377, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 377, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 377, "source": "L['self'].state[list(L['self'].state.keys())[79]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 378, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 378, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 378, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 378, "source": "L['self'].state[list(L['self'].state.keys())[80]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 379, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 379, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 379, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 379, "source": "L['self'].state[list(L['self'].state.keys())[81]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 380, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 380, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 380, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 380, "source": "L['self'].state[list(L['self'].state.keys())[82]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 381, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 381, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 381, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 381, "source": "L['self'].state[list(L['self'].state.keys())[83]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 382, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 382, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 382, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 382, "source": "L['self'].state[list(L['self'].state.keys())[84]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 383, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 383, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 383, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 383, "source": "L['self'].state[list(L['self'].state.keys())[85]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 384, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 384, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 384, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 384, "source": "L['self'].state[list(L['self'].state.keys())[86]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 385, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 385, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 385, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 385, "source": "L['self'].state[list(L['self'].state.keys())[87]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 386, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 386, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 386, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 386, "source": "L['self'].state[list(L['self'].state.keys())[88]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 387, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 387, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 387, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 387, "source": "L['self'].state[list(L['self'].state.keys())[89]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.527000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 388, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.527000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 388, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 388, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 388, "source": "L['self'].state[list(L['self'].state.keys())[90]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 389, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 389, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 389, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 389, "source": "L['self'].state[list(L['self'].state.keys())[91]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 390, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 390, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 390, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 390, "source": "L['self'].state[list(L['self'].state.keys())[92]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 391, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 391, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 391, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 391, "source": "L['self'].state[list(L['self'].state.keys())[93]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 392, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 392, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 392, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 392, "source": "L['self'].state[list(L['self'].state.keys())[94]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 393, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 393, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 393, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 393, "source": "L['self'].state[list(L['self'].state.keys())[95]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 394, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 394, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 394, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 394, "source": "L['self'].state[list(L['self'].state.keys())[96]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 395, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 395, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 395, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 395, "source": "L['self'].state[list(L['self'].state.keys())[97]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 396, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 396, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 396, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 396, "source": "L['self'].state[list(L['self'].state.keys())[98]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 397, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 397, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 397, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 397, "source": "L['self'].state[list(L['self'].state.keys())[99]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 398, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 398, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 398, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 398, "source": "L['self'].state[list(L['self'].state.keys())[100]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 399, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 399, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 399, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 399, "source": "L['self'].state[list(L['self'].state.keys())[101]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 400, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 400, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 400, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 400, "source": "L['self'].state[list(L['self'].state.keys())[102]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 401, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 401, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 401, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 401, "source": "L['self'].state[list(L['self'].state.keys())[103]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 402, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 402, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 402, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 402, "source": "L['self'].state[list(L['self'].state.keys())[104]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 403, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 403, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 403, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 403, "source": "L['self'].state[list(L['self'].state.keys())[105]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 404, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 404, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 404, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 404, "source": "L['self'].state[list(L['self'].state.keys())[106]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 405, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 405, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 405, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 405, "source": "L['self'].state[list(L['self'].state.keys())[107]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 406, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 406, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 406, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 406, "source": "L['self'].state[list(L['self'].state.keys())[108]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 407, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 407, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 407, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 407, "source": "L['self'].state[list(L['self'].state.keys())[109]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 408, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 408, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 408, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 408, "source": "L['self'].state[list(L['self'].state.keys())[110]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 409, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 409, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 409, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 409, "source": "L['self'].state[list(L['self'].state.keys())[111]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 410, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 410, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 410, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 410, "source": "L['self'].state[list(L['self'].state.keys())[112]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 411, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 411, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 411, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 411, "source": "L['self'].state[list(L['self'].state.keys())[113]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 412, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 412, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 412, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 412, "source": "L['self'].state[list(L['self'].state.keys())[114]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 413, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 413, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 413, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 413, "source": "L['self'].state[list(L['self'].state.keys())[115]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 414, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 414, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 414, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 414, "source": "L['self'].state[list(L['self'].state.keys())[116]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 415, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 415, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 415, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 415, "source": "L['self'].state[list(L['self'].state.keys())[117]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 416, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 416, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 416, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 416, "source": "L['self'].state[list(L['self'].state.keys())[118]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 417, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 417, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 417, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 417, "source": "L['self'].state[list(L['self'].state.keys())[119]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 418, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 418, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 418, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 418, "source": "L['self'].state[list(L['self'].state.keys())[120]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 419, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 419, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 419, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 419, "source": "L['self'].state[list(L['self'].state.keys())[121]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 420, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 420, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 420, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 420, "source": "L['self'].state[list(L['self'].state.keys())[122]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 421, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 421, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 421, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 421, "source": "L['self'].state[list(L['self'].state.keys())[123]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 422, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 422, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 422, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 422, "source": "L['self'].state[list(L['self'].state.keys())[124]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 423, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 423, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 423, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 423, "source": "L['self'].state[list(L['self'].state.keys())[125]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 424, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 424, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 424, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 424, "source": "L['self'].state[list(L['self'].state.keys())[126]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 425, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 425, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 425, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 425, "source": "L['self'].state[list(L['self'].state.keys())[127]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 426, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 426, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 426, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 426, "source": "L['self'].state[list(L['self'].state.keys())[128]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 427, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 427, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 427, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 427, "source": "L['self'].state[list(L['self'].state.keys())[129]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 428, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 428, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 428, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 428, "source": "L['self'].state[list(L['self'].state.keys())[130]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 429, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 429, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 429, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 429, "source": "L['self'].state[list(L['self'].state.keys())[131]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 430, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 430, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 430, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 430, "source": "L['self'].state[list(L['self'].state.keys())[132]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 431, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 431, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 431, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 431, "source": "L['self'].state[list(L['self'].state.keys())[133]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 432, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 432, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 432, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 432, "source": "L['self'].state[list(L['self'].state.keys())[134]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 433, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 433, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 433, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 433, "source": "L['self'].state[list(L['self'].state.keys())[135]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 434, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 434, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 434, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 434, "source": "L['self'].state[list(L['self'].state.keys())[136]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 435, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 435, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 435, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 435, "source": "L['self'].state[list(L['self'].state.keys())[137]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 436, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 436, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 436, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 436, "source": "L['self'].state[list(L['self'].state.keys())[138]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 437, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 437, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 437, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 437, "source": "L['self'].state[list(L['self'].state.keys())[139]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 438, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 438, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 438, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 438, "source": "L['self'].state[list(L['self'].state.keys())[140]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 439, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 439, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 439, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 439, "source": "L['self'].state[list(L['self'].state.keys())[141]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 440, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 440, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 440, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 440, "source": "L['self'].state[list(L['self'].state.keys())[142]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 441, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 441, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 441, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 441, "source": "L['self'].state[list(L['self'].state.keys())[143]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 442, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 442, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 442, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 442, "source": "L['self'].state[list(L['self'].state.keys())[144]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 443, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 443, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 443, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 443, "source": "L['self'].state[list(L['self'].state.keys())[145]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 444, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 444, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 444, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 444, "source": "L['self'].state[list(L['self'].state.keys())[146]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 445, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 445, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 445, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 445, "source": "L['self'].state[list(L['self'].state.keys())[147]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 446, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 446, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 446, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 446, "source": "L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 447, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 447, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 447, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 447, "source": "L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 448, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 448, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 448, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 448, "source": "L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 449, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 449, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 449, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 449, "source": "L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 450, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 450, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 450, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 450, "source": "L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 451, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 451, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 451, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 451, "source": "L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 452, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 452, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 452, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 452, "source": "L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 453, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 453, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 453, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 453, "source": "L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 454, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 454, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 454, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 454, "source": "L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 455, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 455, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 455, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 455, "source": "L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 456, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 456, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 456, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 456, "source": "L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 457, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 457, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 457, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 457, "source": "L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 458, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 458, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 458, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 458, "source": "L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 459, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 459, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 459, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 459, "source": "L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 460, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 460, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 460, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 460, "source": "L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 461, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 461, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 461, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 461, "source": "L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 462, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 462, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 462, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 462, "source": "L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 463, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 463, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 463, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 463, "source": "L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 464, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 464, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 464, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 464, "source": "L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 465, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 465, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 465, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 465, "source": "L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 466, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 466, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 466, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 466, "source": "L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 467, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 467, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 467, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 467, "source": "L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 468, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 468, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 468, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 468, "source": "L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 469, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 469, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 469, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 469, "source": "L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 470, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 470, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 470, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 470, "source": "L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 471, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 471, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 471, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 471, "source": "L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 472, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 472, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 472, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 472, "source": "L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 473, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 473, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 473, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 473, "source": "L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 474, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 474, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 474, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 474, "source": "L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 475, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 475, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 475, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 475, "source": "L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 476, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 476, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 476, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 476, "source": "L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 477, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 477, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 477, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 477, "source": "L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 478, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 478, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 478, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 478, "source": "L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 479, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 479, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 479, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 479, "source": "L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 480, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 480, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 480, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 480, "source": "L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 481, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 481, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 481, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 481, "source": "L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 482, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 482, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 482, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 482, "source": "L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 483, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 483, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 483, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 483, "source": "L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 484, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 484, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 484, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 484, "source": "L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 485, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 485, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 485, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 485, "source": "L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 486, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 486, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 486, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 486, "source": "L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 487, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 487, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 487, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 487, "source": "L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 488, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 488, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 488, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 488, "source": "L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 489, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 489, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 489, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 489, "source": "L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 490, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 490, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 490, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 490, "source": "L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 491, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 491, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 491, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 491, "source": "L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 492, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 492, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 492, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 492, "source": "L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 493, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 493, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 493, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 493, "source": "L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 494, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 494, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 494, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 494, "source": "L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 495, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 495, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 495, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 495, "source": "L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 496, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 496, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 496, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 496, "source": "L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 497, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 497, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 497, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 497, "source": "L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 498, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 498, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 498, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 498, "source": "L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 499, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 499, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 499, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 499, "source": "L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 500, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 500, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 500, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 500, "source": "L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 501, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 501, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 501, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 501, "source": "L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 502, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 502, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 502, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 502, "source": "L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 503, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 503, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 503, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 503, "source": "L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 504, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 504, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 504, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 504, "source": "L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 505, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 505, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 505, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 505, "source": "L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 506, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 506, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 506, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 506, "source": "L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 507, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 507, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 507, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 507, "source": "L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 508, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 508, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 508, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 508, "source": "L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 509, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 509, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 509, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 509, "source": "L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 510, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 510, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 510, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 510, "source": "L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 511, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 511, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 511, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 511, "source": "L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 512, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 512, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 512, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 512, "source": "L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 513, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 513, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 513, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 513, "source": "L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 514, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 514, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 514, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 514, "source": "L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 515, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 515, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 515, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 515, "source": "L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 516, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 516, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 516, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 516, "source": "L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 517, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 517, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 517, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 517, "source": "L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 518, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 518, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 518, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 518, "source": "L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 519, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 519, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 519, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 519, "source": "L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 520, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 520, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 520, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 520, "source": "L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 521, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 521, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 521, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 521, "source": "L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 522, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 522, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 522, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 522, "source": "L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 523, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 523, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 523, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 523, "source": "L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 524, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 524, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 524, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 524, "source": "L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 525, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 525, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 525, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 525, "source": "L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 526, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 526, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 526, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 526, "source": "L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 527, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 527, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 527, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 527, "source": "L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 528, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 528, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 528, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 528, "source": "L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 529, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 529, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 529, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 529, "source": "L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 530, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 530, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 530, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 530, "source": "L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 531, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 531, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 531, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 531, "source": "L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 532, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 532, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 532, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 532, "source": "L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 533, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 533, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 533, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 533, "source": "L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 534, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 534, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 534, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 534, "source": "L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 535, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 535, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 535, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 535, "source": "L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 536, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 536, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 536, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 536, "source": "L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 537, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 537, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 537, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 537, "source": "L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 538, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 538, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 538, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 538, "source": "L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 539, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 539, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 539, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 539, "source": "L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 540, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 540, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 540, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 540, "source": "L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 541, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 541, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 541, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 541, "source": "L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 542, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 542, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 542, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 542, "source": "L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 543, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 543, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 543, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 543, "source": "L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 544, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 544, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 544, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 544, "source": "L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 545, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 545, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 545, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 545, "source": "L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 546, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 546, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 546, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 546, "source": "L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 547, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 547, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 547, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 547, "source": "L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 548, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 548, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 548, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 548, "source": "L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 549, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 549, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 549, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 549, "source": "L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 550, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 550, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 550, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 550, "source": "L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 551, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 551, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 551, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 551, "source": "L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 552, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 552, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 552, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 552, "source": "L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 553, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 553, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 553, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 553, "source": "L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 554, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 554, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 554, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 554, "source": "L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 555, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 555, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 555, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 555, "source": "L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 556, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 556, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 556, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 556, "source": "L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 557, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 557, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 557, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 557, "source": "L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 558, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 558, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 558, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 558, "source": "L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 559, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 559, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 559, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 559, "source": "L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 560, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 560, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 560, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 560, "source": "L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 561, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 561, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 561, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 561, "source": "L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 562, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 562, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 562, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 562, "source": "L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 563, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 563, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 563, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 563, "source": "L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 564, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 564, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 564, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 564, "source": "L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 565, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 565, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 565, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 565, "source": "L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 566, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 566, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 566, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 566, "source": "L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 567, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 567, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 567, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 567, "source": "L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 568, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 568, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 568, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 568, "source": "L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 569, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 569, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 569, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 569, "source": "L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 570, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 570, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 570, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 570, "source": "L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 571, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 571, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 571, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 571, "source": "L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 572, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 572, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 572, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 572, "source": "L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 573, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 573, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 573, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 573, "source": "L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 574, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 574, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 574, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 574, "source": "L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 575, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 575, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 575, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 575, "source": "L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 576, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 576, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 576, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 576, "source": "L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 577, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 577, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 577, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 577, "source": "L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 578, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 578, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 578, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 578, "source": "L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 579, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 579, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 579, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 579, "source": "L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 580, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 580, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 580, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 580, "source": "L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 581, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 581, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 581, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 581, "source": "L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 582, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 582, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 582, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 582, "source": "L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 583, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 583, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 583, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 583, "source": "L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 584, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 584, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 584, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 584, "source": "L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 585, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 585, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 585, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 585, "source": "L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 586, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 586, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 586, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 586, "source": "L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 587, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 587, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 587, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 587, "source": "L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 588, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 588, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 588, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 588, "source": "L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 589, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 589, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 589, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 589, "source": "L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 590, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 590, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 590, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 590, "source": "L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 591, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 591, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 591, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 591, "source": "L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 592, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 592, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 592, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 592, "source": "L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 593, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 593, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 593, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 593, "source": "L['self'].state[list(L['self'].state.keys())[0]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 594, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 594, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 594, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 594, "source": "L['self'].state[list(L['self'].state.keys())[2]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 595, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 595, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 595, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 595, "source": "L['self'].state[list(L['self'].state.keys())[3]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 596, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 596, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 596, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 596, "source": "L['self'].state[list(L['self'].state.keys())[4]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 597, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 597, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 597, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 597, "source": "L['self'].state[list(L['self'].state.keys())[5]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 598, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 598, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 598, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 598, "source": "L['self'].state[list(L['self'].state.keys())[6]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 599, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 599, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 599, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 599, "source": "L['self'].state[list(L['self'].state.keys())[7]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 600, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 600, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 600, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 600, "source": "L['self'].state[list(L['self'].state.keys())[8]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 601, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 601, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 601, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 601, "source": "L['self'].state[list(L['self'].state.keys())[9]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 602, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 602, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 602, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 602, "source": "L['self'].state[list(L['self'].state.keys())[10]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 603, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 603, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 603, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 603, "source": "L['self'].state[list(L['self'].state.keys())[11]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 604, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 604, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 604, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 604, "source": "L['self'].state[list(L['self'].state.keys())[12]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 605, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 605, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 605, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 605, "source": "L['self'].state[list(L['self'].state.keys())[13]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 606, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 606, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 606, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 606, "source": "L['self'].state[list(L['self'].state.keys())[14]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 607, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 607, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 607, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 607, "source": "L['self'].state[list(L['self'].state.keys())[15]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 608, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 608, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 608, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 608, "source": "L['self'].state[list(L['self'].state.keys())[16]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 609, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 609, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 609, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 609, "source": "L['self'].state[list(L['self'].state.keys())[17]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 610, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 610, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 610, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 610, "source": "L['self'].state[list(L['self'].state.keys())[18]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 611, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 611, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 611, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 611, "source": "L['self'].state[list(L['self'].state.keys())[19]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 612, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 612, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 612, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 612, "source": "L['self'].state[list(L['self'].state.keys())[20]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 613, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 613, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 613, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 613, "source": "L['self'].state[list(L['self'].state.keys())[21]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 614, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 614, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 614, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 614, "source": "L['self'].state[list(L['self'].state.keys())[22]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 615, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 615, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 615, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 615, "source": "L['self'].state[list(L['self'].state.keys())[23]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 616, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 616, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 616, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 616, "source": "L['self'].state[list(L['self'].state.keys())[24]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 617, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 617, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 617, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 617, "source": "L['self'].state[list(L['self'].state.keys())[25]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 618, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 618, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 618, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 618, "source": "L['self'].state[list(L['self'].state.keys())[26]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 619, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 619, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 619, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 619, "source": "L['self'].state[list(L['self'].state.keys())[27]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 620, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 620, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 620, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 620, "source": "L['self'].state[list(L['self'].state.keys())[28]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 621, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 621, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 621, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 621, "source": "L['self'].state[list(L['self'].state.keys())[29]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 622, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 622, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 622, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 622, "source": "L['self'].state[list(L['self'].state.keys())[30]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 623, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 623, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 623, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 623, "source": "L['self'].state[list(L['self'].state.keys())[31]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 624, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 624, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 624, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 624, "source": "L['self'].state[list(L['self'].state.keys())[32]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 625, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 625, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 625, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 625, "source": "L['self'].state[list(L['self'].state.keys())[33]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 626, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 626, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 626, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 626, "source": "L['self'].state[list(L['self'].state.keys())[34]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 627, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 627, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 627, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 627, "source": "L['self'].state[list(L['self'].state.keys())[35]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 628, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 628, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 628, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 628, "source": "L['self'].state[list(L['self'].state.keys())[36]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 629, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 629, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 629, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 629, "source": "L['self'].state[list(L['self'].state.keys())[37]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 630, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 630, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 630, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 630, "source": "L['self'].state[list(L['self'].state.keys())[38]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 631, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 631, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 631, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 631, "source": "L['self'].state[list(L['self'].state.keys())[39]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 632, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 632, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 632, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 632, "source": "L['self'].state[list(L['self'].state.keys())[40]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 633, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 633, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 633, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 633, "source": "L['self'].state[list(L['self'].state.keys())[41]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 634, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 634, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 634, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 634, "source": "L['self'].state[list(L['self'].state.keys())[42]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 635, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 635, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 635, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 635, "source": "L['self'].state[list(L['self'].state.keys())[43]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 636, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 636, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 636, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 636, "source": "L['self'].state[list(L['self'].state.keys())[44]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 637, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 637, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 637, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 637, "source": "L['self'].state[list(L['self'].state.keys())[45]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 638, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 638, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 638, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 638, "source": "L['self'].state[list(L['self'].state.keys())[46]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 639, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 639, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 639, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 639, "source": "L['self'].state[list(L['self'].state.keys())[47]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 640, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 640, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 640, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 640, "source": "L['self'].state[list(L['self'].state.keys())[48]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 641, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 641, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 641, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 641, "source": "L['self'].state[list(L['self'].state.keys())[49]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 642, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 642, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 642, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 642, "source": "L['self'].state[list(L['self'].state.keys())[50]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 643, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 643, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 643, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 643, "source": "L['self'].state[list(L['self'].state.keys())[51]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 644, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 644, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 644, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 644, "source": "L['self'].state[list(L['self'].state.keys())[52]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 645, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 645, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 645, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 645, "source": "L['self'].state[list(L['self'].state.keys())[53]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 646, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 646, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 646, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 646, "source": "L['self'].state[list(L['self'].state.keys())[54]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 647, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 647, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 647, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 647, "source": "L['self'].state[list(L['self'].state.keys())[55]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 648, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 648, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 648, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 648, "source": "L['self'].state[list(L['self'].state.keys())[56]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 649, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 649, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 649, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 649, "source": "L['self'].state[list(L['self'].state.keys())[57]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 650, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 650, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 650, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 650, "source": "L['self'].state[list(L['self'].state.keys())[58]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 651, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 651, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 651, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 651, "source": "L['self'].state[list(L['self'].state.keys())[59]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 652, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 652, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 652, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 652, "source": "L['self'].state[list(L['self'].state.keys())[60]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 653, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 653, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 653, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 653, "source": "L['self'].state[list(L['self'].state.keys())[61]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 654, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 654, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 654, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 654, "source": "L['self'].state[list(L['self'].state.keys())[62]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 655, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 655, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 655, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 655, "source": "L['self'].state[list(L['self'].state.keys())[63]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 656, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 656, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 656, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 656, "source": "L['self'].state[list(L['self'].state.keys())[64]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 657, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 657, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 657, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 657, "source": "L['self'].state[list(L['self'].state.keys())[65]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 658, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 658, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 658, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 658, "source": "L['self'].state[list(L['self'].state.keys())[66]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 659, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 659, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 659, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 659, "source": "L['self'].state[list(L['self'].state.keys())[67]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 660, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 660, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 660, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 660, "source": "L['self'].state[list(L['self'].state.keys())[68]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 661, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 661, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 661, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 661, "source": "L['self'].state[list(L['self'].state.keys())[69]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 662, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 662, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 662, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 662, "source": "L['self'].state[list(L['self'].state.keys())[70]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 663, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 663, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 663, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 663, "source": "L['self'].state[list(L['self'].state.keys())[71]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 664, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 664, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 664, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 664, "source": "L['self'].state[list(L['self'].state.keys())[72]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 665, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 665, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 665, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 665, "source": "L['self'].state[list(L['self'].state.keys())[73]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 666, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 666, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 666, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 666, "source": "L['self'].state[list(L['self'].state.keys())[74]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 667, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 667, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 667, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 667, "source": "L['self'].state[list(L['self'].state.keys())[75]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 668, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 668, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 668, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 668, "source": "L['self'].state[list(L['self'].state.keys())[76]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 669, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 669, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 669, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 669, "source": "L['self'].state[list(L['self'].state.keys())[77]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 670, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 670, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 670, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 670, "source": "L['self'].state[list(L['self'].state.keys())[78]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 671, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 671, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 671, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 671, "source": "L['self'].state[list(L['self'].state.keys())[79]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 672, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 672, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 672, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 672, "source": "L['self'].state[list(L['self'].state.keys())[80]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 673, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 673, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 673, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 673, "source": "L['self'].state[list(L['self'].state.keys())[81]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 674, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 674, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 674, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 674, "source": "L['self'].state[list(L['self'].state.keys())[82]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 675, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 675, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 675, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 675, "source": "L['self'].state[list(L['self'].state.keys())[83]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 676, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 676, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 676, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 676, "source": "L['self'].state[list(L['self'].state.keys())[84]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 677, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 677, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 677, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 677, "source": "L['self'].state[list(L['self'].state.keys())[85]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 678, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 678, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 678, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 678, "source": "L['self'].state[list(L['self'].state.keys())[86]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 679, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 679, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 679, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 679, "source": "L['self'].state[list(L['self'].state.keys())[87]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 680, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 680, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 680, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 680, "source": "L['self'].state[list(L['self'].state.keys())[88]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 681, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 681, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 681, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 681, "source": "L['self'].state[list(L['self'].state.keys())[89]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 682, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 682, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 682, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 682, "source": "L['self'].state[list(L['self'].state.keys())[90]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 683, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 683, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 683, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 683, "source": "L['self'].state[list(L['self'].state.keys())[91]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 684, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 684, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 684, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 684, "source": "L['self'].state[list(L['self'].state.keys())[92]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 685, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 685, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 685, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 685, "source": "L['self'].state[list(L['self'].state.keys())[93]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 686, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 686, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 686, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 686, "source": "L['self'].state[list(L['self'].state.keys())[94]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 687, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 687, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 687, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 687, "source": "L['self'].state[list(L['self'].state.keys())[95]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 688, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 688, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 688, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 688, "source": "L['self'].state[list(L['self'].state.keys())[96]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 689, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 689, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 689, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 689, "source": "L['self'].state[list(L['self'].state.keys())[97]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 690, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 690, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 690, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 690, "source": "L['self'].state[list(L['self'].state.keys())[98]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 691, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 691, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 691, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 691, "source": "L['self'].state[list(L['self'].state.keys())[99]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 692, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 692, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 692, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 692, "source": "L['self'].state[list(L['self'].state.keys())[100]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 693, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 693, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 693, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 693, "source": "L['self'].state[list(L['self'].state.keys())[101]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 694, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 694, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 694, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 694, "source": "L['self'].state[list(L['self'].state.keys())[102]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 695, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 695, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 695, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 695, "source": "L['self'].state[list(L['self'].state.keys())[103]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 696, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 696, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 696, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 696, "source": "L['self'].state[list(L['self'].state.keys())[104]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 697, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 697, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 697, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 697, "source": "L['self'].state[list(L['self'].state.keys())[105]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 698, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 698, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 698, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 698, "source": "L['self'].state[list(L['self'].state.keys())[106]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 699, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 699, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 699, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 699, "source": "L['self'].state[list(L['self'].state.keys())[107]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 700, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 700, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 700, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 700, "source": "L['self'].state[list(L['self'].state.keys())[108]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 701, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 701, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 701, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 701, "source": "L['self'].state[list(L['self'].state.keys())[109]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 702, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 702, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 702, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 702, "source": "L['self'].state[list(L['self'].state.keys())[110]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 703, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 703, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 703, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 703, "source": "L['self'].state[list(L['self'].state.keys())[111]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 704, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 704, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 704, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 704, "source": "L['self'].state[list(L['self'].state.keys())[112]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 705, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 705, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 705, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 705, "source": "L['self'].state[list(L['self'].state.keys())[113]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 706, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 706, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 706, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 706, "source": "L['self'].state[list(L['self'].state.keys())[114]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 707, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 707, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 707, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 707, "source": "L['self'].state[list(L['self'].state.keys())[115]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 708, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 708, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 708, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 708, "source": "L['self'].state[list(L['self'].state.keys())[116]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 709, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 709, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 709, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 709, "source": "L['self'].state[list(L['self'].state.keys())[117]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 710, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 710, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 710, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 710, "source": "L['self'].state[list(L['self'].state.keys())[118]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 711, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 711, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 711, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 711, "source": "L['self'].state[list(L['self'].state.keys())[119]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 712, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 712, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 712, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 712, "source": "L['self'].state[list(L['self'].state.keys())[120]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 713, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 713, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 713, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 713, "source": "L['self'].state[list(L['self'].state.keys())[121]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 714, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 714, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 714, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 714, "source": "L['self'].state[list(L['self'].state.keys())[122]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 715, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 715, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 715, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 715, "source": "L['self'].state[list(L['self'].state.keys())[123]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 716, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 716, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 716, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 716, "source": "L['self'].state[list(L['self'].state.keys())[124]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 717, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 717, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 717, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 717, "source": "L['self'].state[list(L['self'].state.keys())[125]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 718, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 718, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 718, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 718, "source": "L['self'].state[list(L['self'].state.keys())[126]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 719, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 719, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 719, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 719, "source": "L['self'].state[list(L['self'].state.keys())[127]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 720, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 720, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 720, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 720, "source": "L['self'].state[list(L['self'].state.keys())[128]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 721, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 721, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 721, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 721, "source": "L['self'].state[list(L['self'].state.keys())[129]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 722, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 722, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 722, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 722, "source": "L['self'].state[list(L['self'].state.keys())[130]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 723, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 723, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 723, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 723, "source": "L['self'].state[list(L['self'].state.keys())[131]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 724, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 724, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 724, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 724, "source": "L['self'].state[list(L['self'].state.keys())[132]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 725, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 725, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 725, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 725, "source": "L['self'].state[list(L['self'].state.keys())[133]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 726, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 726, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 726, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 726, "source": "L['self'].state[list(L['self'].state.keys())[134]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 727, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 727, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 727, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 727, "source": "L['self'].state[list(L['self'].state.keys())[135]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 728, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 728, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 728, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 728, "source": "L['self'].state[list(L['self'].state.keys())[136]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 729, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 729, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 729, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 729, "source": "L['self'].state[list(L['self'].state.keys())[137]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 730, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 730, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 730, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 730, "source": "L['self'].state[list(L['self'].state.keys())[138]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 731, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 731, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 731, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 731, "source": "L['self'].state[list(L['self'].state.keys())[139]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 732, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 732, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 732, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 732, "source": "L['self'].state[list(L['self'].state.keys())[140]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 733, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 733, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 733, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 733, "source": "L['self'].state[list(L['self'].state.keys())[141]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 734, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 734, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 734, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 734, "source": "L['self'].state[list(L['self'].state.keys())[142]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 735, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 735, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 735, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 735, "source": "L['self'].state[list(L['self'].state.keys())[143]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 736, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 736, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 736, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 736, "source": "L['self'].state[list(L['self'].state.keys())[144]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 737, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 737, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 737, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 737, "source": "L['self'].state[list(L['self'].state.keys())[145]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 738, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 738, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 738, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 738, "source": "L['self'].state[list(L['self'].state.keys())[146]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 739, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 739, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 739, "view_func": "", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:06.729000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 739, "source": "L['self'].state[list(L['self'].state.keys())[147]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:07.381000 4107173 torch/_dynamo/output_graph.py:1337] {"dynamo_output_graph": {"sizes": {"l_self_param_groups_0_params_0_": [50304, 768], "l_self_param_groups_0_params_1_": [1024, 768], "l_self_param_groups_0_params_2_": [768], "l_self_param_groups_0_params_3_": [768], "l_self_param_groups_0_params_4_": [2304, 768], "l_self_param_groups_0_params_5_": [2304], "l_self_param_groups_0_params_6_": [768, 768], "l_self_param_groups_0_params_7_": [768], "l_self_param_groups_0_params_8_": [768], "l_self_param_groups_0_params_9_": [768], "l_self_param_groups_0_params_10_": [3072, 768], "l_self_param_groups_0_params_11_": [3072], "l_self_param_groups_0_params_12_": [768, 3072], "l_self_param_groups_0_params_13_": [768], "l_self_param_groups_0_params_14_": [768], "l_self_param_groups_0_params_15_": [768], "l_self_param_groups_0_params_16_": [2304, 768], "l_self_param_groups_0_params_17_": [2304], "l_self_param_groups_0_params_18_": [768, 768], "l_self_param_groups_0_params_19_": [768], "l_self_param_groups_0_params_20_": [768], "l_self_param_groups_0_params_21_": [768], "l_self_param_groups_0_params_22_": [3072, 768], "l_self_param_groups_0_params_23_": [3072], "l_self_param_groups_0_params_24_": [768, 3072], "l_self_param_groups_0_params_25_": [768], "l_self_param_groups_0_params_26_": [768], "l_self_param_groups_0_params_27_": [768], "l_self_param_groups_0_params_28_": [2304, 768], "l_self_param_groups_0_params_29_": [2304], "l_self_param_groups_0_params_30_": [768, 768], "l_self_param_groups_0_params_31_": [768], "l_self_param_groups_0_params_32_": [768], "l_self_param_groups_0_params_33_": [768], "l_self_param_groups_0_params_34_": [3072, 768], "l_self_param_groups_0_params_35_": [3072], "l_self_param_groups_0_params_36_": [768, 3072], "l_self_param_groups_0_params_37_": [768], "l_self_param_groups_0_params_38_": [768], "l_self_param_groups_0_params_39_": [768], "l_self_param_groups_0_params_40_": [2304, 768], "l_self_param_groups_0_params_41_": [2304], "l_self_param_groups_0_params_42_": [768, 768], "l_self_param_groups_0_params_43_": [768], "l_self_param_groups_0_params_44_": [768], "l_self_param_groups_0_params_45_": [768], "l_self_param_groups_0_params_46_": [3072, 768], "l_self_param_groups_0_params_47_": [3072], "l_self_param_groups_0_params_48_": [768, 3072], "l_self_param_groups_0_params_49_": [768], "l_self_param_groups_0_params_50_": [768], "l_self_param_groups_0_params_51_": [768], "l_self_param_groups_0_params_52_": [2304, 768], "l_self_param_groups_0_params_53_": [2304], "l_self_param_groups_0_params_54_": [768, 768], "l_self_param_groups_0_params_55_": [768], "l_self_param_groups_0_params_56_": [768], "l_self_param_groups_0_params_57_": [768], "l_self_param_groups_0_params_58_": [3072, 768], "l_self_param_groups_0_params_59_": [3072], "l_self_param_groups_0_params_60_": [768, 3072], "l_self_param_groups_0_params_61_": [768], "l_self_param_groups_0_params_62_": [768], "l_self_param_groups_0_params_63_": [768], "l_self_param_groups_0_params_64_": [2304, 768], "l_self_param_groups_0_params_65_": [2304], "l_self_param_groups_0_params_66_": [768, 768], "l_self_param_groups_0_params_67_": [768], "l_self_param_groups_0_params_68_": [768], "l_self_param_groups_0_params_69_": [768], "l_self_param_groups_0_params_70_": [3072, 768], "l_self_param_groups_0_params_71_": [3072], "l_self_param_groups_0_params_72_": [768, 3072], "l_self_param_groups_0_params_73_": [768], "l_self_param_groups_0_params_74_": [768], "l_self_param_groups_0_params_75_": [768], "l_self_param_groups_0_params_76_": [2304, 768], "l_self_param_groups_0_params_77_": [2304], "l_self_param_groups_0_params_78_": [768, 768], "l_self_param_groups_0_params_79_": [768], "l_self_param_groups_0_params_80_": [768], "l_self_param_groups_0_params_81_": [768], "l_self_param_groups_0_params_82_": [3072, 768], "l_self_param_groups_0_params_83_": [3072], "l_self_param_groups_0_params_84_": [768, 3072], "l_self_param_groups_0_params_85_": [768], "l_self_param_groups_0_params_86_": [768], "l_self_param_groups_0_params_87_": [768], "l_self_param_groups_0_params_88_": [2304, 768], "l_self_param_groups_0_params_89_": [2304], "l_self_param_groups_0_params_90_": [768, 768], "l_self_param_groups_0_params_91_": [768], "l_self_param_groups_0_params_92_": [768], "l_self_param_groups_0_params_93_": [768], "l_self_param_groups_0_params_94_": [3072, 768], "l_self_param_groups_0_params_95_": [3072], "l_self_param_groups_0_params_96_": [768, 3072], "l_self_param_groups_0_params_97_": [768], "l_self_param_groups_0_params_98_": [768], "l_self_param_groups_0_params_99_": [768], "l_self_param_groups_0_params_100_": [2304, 768], "l_self_param_groups_0_params_101_": [2304], "l_self_param_groups_0_params_102_": [768, 768], "l_self_param_groups_0_params_103_": [768], "l_self_param_groups_0_params_104_": [768], "l_self_param_groups_0_params_105_": [768], "l_self_param_groups_0_params_106_": [3072, 768], "l_self_param_groups_0_params_107_": [3072], "l_self_param_groups_0_params_108_": [768, 3072], "l_self_param_groups_0_params_109_": [768], "l_self_param_groups_0_params_110_": [768], "l_self_param_groups_0_params_111_": [768], "l_self_param_groups_0_params_112_": [2304, 768], "l_self_param_groups_0_params_113_": [2304], "l_self_param_groups_0_params_114_": [768, 768], "l_self_param_groups_0_params_115_": [768], "l_self_param_groups_0_params_116_": [768], "l_self_param_groups_0_params_117_": [768], "l_self_param_groups_0_params_118_": [3072, 768], "l_self_param_groups_0_params_119_": [3072], "l_self_param_groups_0_params_120_": [768, 3072], "l_self_param_groups_0_params_121_": [768], "l_self_param_groups_0_params_122_": [768], "l_self_param_groups_0_params_123_": [768], "l_self_param_groups_0_params_124_": [2304, 768], "l_self_param_groups_0_params_125_": [2304], "l_self_param_groups_0_params_126_": [768, 768], "l_self_param_groups_0_params_127_": [768], "l_self_param_groups_0_params_128_": [768], "l_self_param_groups_0_params_129_": [768], "l_self_param_groups_0_params_130_": [3072, 768], "l_self_param_groups_0_params_131_": [3072], "l_self_param_groups_0_params_132_": [768, 3072], "l_self_param_groups_0_params_133_": [768], "l_self_param_groups_0_params_134_": [768], "l_self_param_groups_0_params_135_": [768], "l_self_param_groups_0_params_136_": [2304, 768], "l_self_param_groups_0_params_137_": [2304], "l_self_param_groups_0_params_138_": [768, 768], "l_self_param_groups_0_params_139_": [768], "l_self_param_groups_0_params_140_": [768], "l_self_param_groups_0_params_141_": [768], "l_self_param_groups_0_params_142_": [3072, 768], "l_self_param_groups_0_params_143_": [3072], "l_self_param_groups_0_params_144_": [768, 3072], "l_self_param_groups_0_params_145_": [768], "l_self_param_groups_0_params_146_": [768], "l_self_param_groups_0_params_147_": [768], "l_self_state_list_l_self_state_keys_1_step_": [], "l_self_state_list_l_self_state_keys_1_exp_avg_": [1024, 768], "l_self_state_list_l_self_state_keys_1_exp_avg_sq_": [1024, 768], "l_self_param_groups_0_params_0_grad": [50304, 768], "l_self_param_groups_0_params_1_grad": [1024, 768], "l_self_param_groups_0_params_2_grad": [768], "l_self_param_groups_0_params_3_grad": [768], "l_self_param_groups_0_params_4_grad": [2304, 768], "l_self_param_groups_0_params_5_grad": [2304], "l_self_param_groups_0_params_6_grad": [768, 768], "l_self_param_groups_0_params_7_grad": [768], "l_self_param_groups_0_params_8_grad": [768], "l_self_param_groups_0_params_9_grad": [768], "l_self_param_groups_0_params_10_grad": [3072, 768], "l_self_param_groups_0_params_11_grad": [3072], "l_self_param_groups_0_params_12_grad": [768, 3072], "l_self_param_groups_0_params_13_grad": [768], "l_self_param_groups_0_params_14_grad": [768], "l_self_param_groups_0_params_15_grad": [768], "l_self_param_groups_0_params_16_grad": [2304, 768], "l_self_param_groups_0_params_17_grad": [2304], "l_self_param_groups_0_params_18_grad": [768, 768], "l_self_param_groups_0_params_19_grad": [768], "l_self_param_groups_0_params_20_grad": [768], "l_self_param_groups_0_params_21_grad": [768], "l_self_param_groups_0_params_22_grad": [3072, 768], "l_self_param_groups_0_params_23_grad": [3072], "l_self_param_groups_0_params_24_grad": [768, 3072], "l_self_param_groups_0_params_25_grad": [768], "l_self_param_groups_0_params_26_grad": [768], "l_self_param_groups_0_params_27_grad": [768], "l_self_param_groups_0_params_28_grad": [2304, 768], "l_self_param_groups_0_params_29_grad": [2304], "l_self_param_groups_0_params_30_grad": [768, 768], "l_self_param_groups_0_params_31_grad": [768], "l_self_param_groups_0_params_32_grad": [768], "l_self_param_groups_0_params_33_grad": [768], "l_self_param_groups_0_params_34_grad": [3072, 768], "l_self_param_groups_0_params_35_grad": [3072], "l_self_param_groups_0_params_36_grad": [768, 3072], "l_self_param_groups_0_params_37_grad": [768], "l_self_param_groups_0_params_38_grad": [768], "l_self_param_groups_0_params_39_grad": [768], "l_self_param_groups_0_params_40_grad": [2304, 768], "l_self_param_groups_0_params_41_grad": [2304], "l_self_param_groups_0_params_42_grad": [768, 768], "l_self_param_groups_0_params_43_grad": [768], "l_self_param_groups_0_params_44_grad": [768], "l_self_param_groups_0_params_45_grad": [768], "l_self_param_groups_0_params_46_grad": [3072, 768], "l_self_param_groups_0_params_47_grad": [3072], "l_self_param_groups_0_params_48_grad": [768, 3072], "l_self_param_groups_0_params_49_grad": [768], "l_self_param_groups_0_params_50_grad": [768], "l_self_param_groups_0_params_51_grad": [768], "l_self_param_groups_0_params_52_grad": [2304, 768], "l_self_param_groups_0_params_53_grad": [2304], "l_self_param_groups_0_params_54_grad": [768, 768], "l_self_param_groups_0_params_55_grad": [768], "l_self_param_groups_0_params_56_grad": [768], "l_self_param_groups_0_params_57_grad": [768], "l_self_param_groups_0_params_58_grad": [3072, 768], "l_self_param_groups_0_params_59_grad": [3072], "l_self_param_groups_0_params_60_grad": [768, 3072], "l_self_param_groups_0_params_61_grad": [768], "l_self_param_groups_0_params_62_grad": [768], "l_self_param_groups_0_params_63_grad": [768], "l_self_param_groups_0_params_64_grad": [2304, 768], "l_self_param_groups_0_params_65_grad": [2304], "l_self_param_groups_0_params_66_grad": [768, 768], "l_self_param_groups_0_params_67_grad": [768], "l_self_param_groups_0_params_68_grad": [768], "l_self_param_groups_0_params_69_grad": [768], "l_self_param_groups_0_params_70_grad": [3072, 768], "l_self_param_groups_0_params_71_grad": [3072], "l_self_param_groups_0_params_72_grad": [768, 3072], "l_self_param_groups_0_params_73_grad": [768], "l_self_param_groups_0_params_74_grad": [768], "l_self_param_groups_0_params_75_grad": [768], "l_self_param_groups_0_params_76_grad": [2304, 768], "l_self_param_groups_0_params_77_grad": [2304], "l_self_param_groups_0_params_78_grad": [768, 768], "l_self_param_groups_0_params_79_grad": [768], "l_self_param_groups_0_params_80_grad": [768], "l_self_param_groups_0_params_81_grad": [768], "l_self_param_groups_0_params_82_grad": [3072, 768], "l_self_param_groups_0_params_83_grad": [3072], "l_self_param_groups_0_params_84_grad": [768, 3072], "l_self_param_groups_0_params_85_grad": [768], "l_self_param_groups_0_params_86_grad": [768], "l_self_param_groups_0_params_87_grad": [768], "l_self_param_groups_0_params_88_grad": [2304, 768], "l_self_param_groups_0_params_89_grad": [2304], "l_self_param_groups_0_params_90_grad": [768, 768], "l_self_param_groups_0_params_91_grad": [768], "l_self_param_groups_0_params_92_grad": [768], "l_self_param_groups_0_params_93_grad": [768], "l_self_param_groups_0_params_94_grad": [3072, 768], "l_self_param_groups_0_params_95_grad": [3072], "l_self_param_groups_0_params_96_grad": [768, 3072], "l_self_param_groups_0_params_97_grad": [768], "l_self_param_groups_0_params_98_grad": [768], "l_self_param_groups_0_params_99_grad": [768], "l_self_param_groups_0_params_100_grad": [2304, 768], "l_self_param_groups_0_params_101_grad": [2304], "l_self_param_groups_0_params_102_grad": [768, 768], "l_self_param_groups_0_params_103_grad": [768], "l_self_param_groups_0_params_104_grad": [768], "l_self_param_groups_0_params_105_grad": [768], "l_self_param_groups_0_params_106_grad": [3072, 768], "l_self_param_groups_0_params_107_grad": [3072], "l_self_param_groups_0_params_108_grad": [768, 3072], "l_self_param_groups_0_params_109_grad": [768], "l_self_param_groups_0_params_110_grad": [768], "l_self_param_groups_0_params_111_grad": [768], "l_self_param_groups_0_params_112_grad": [2304, 768], "l_self_param_groups_0_params_113_grad": [2304], "l_self_param_groups_0_params_114_grad": [768, 768], "l_self_param_groups_0_params_115_grad": [768], "l_self_param_groups_0_params_116_grad": [768], "l_self_param_groups_0_params_117_grad": [768], "l_self_param_groups_0_params_118_grad": [3072, 768], "l_self_param_groups_0_params_119_grad": [3072], "l_self_param_groups_0_params_120_grad": [768, 3072], "l_self_param_groups_0_params_121_grad": [768], "l_self_param_groups_0_params_122_grad": [768], "l_self_param_groups_0_params_123_grad": [768], "l_self_param_groups_0_params_124_grad": [2304, 768], "l_self_param_groups_0_params_125_grad": [2304], "l_self_param_groups_0_params_126_grad": [768, 768], "l_self_param_groups_0_params_127_grad": [768], "l_self_param_groups_0_params_128_grad": [768], "l_self_param_groups_0_params_129_grad": [768], "l_self_param_groups_0_params_130_grad": [3072, 768], "l_self_param_groups_0_params_131_grad": [3072], "l_self_param_groups_0_params_132_grad": [768, 3072], "l_self_param_groups_0_params_133_grad": [768], "l_self_param_groups_0_params_134_grad": [768], "l_self_param_groups_0_params_135_grad": [768], "l_self_param_groups_0_params_136_grad": [2304, 768], "l_self_param_groups_0_params_137_grad": [2304], "l_self_param_groups_0_params_138_grad": [768, 768], "l_self_param_groups_0_params_139_grad": [768], "l_self_param_groups_0_params_140_grad": [768], "l_self_param_groups_0_params_141_grad": [768], "l_self_param_groups_0_params_142_grad": [3072, 768], "l_self_param_groups_0_params_143_grad": [3072], "l_self_param_groups_0_params_144_grad": [768, 3072], "l_self_param_groups_0_params_145_grad": [768], "l_self_param_groups_0_params_146_grad": [768], "l_self_param_groups_0_params_147_grad": [768], "l_self_state_list_l_self_state_keys_0_exp_avg_": [50304, 768], "l_self_state_list_l_self_state_keys_2_exp_avg_": [768], "l_self_state_list_l_self_state_keys_3_exp_avg_": [768], "l_self_state_list_l_self_state_keys_4_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_5_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_6_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_7_exp_avg_": [768], "l_self_state_list_l_self_state_keys_8_exp_avg_": [768], "l_self_state_list_l_self_state_keys_9_exp_avg_": [768], "l_self_state_list_l_self_state_keys_10_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_11_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_12_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_13_exp_avg_": [768], "l_self_state_list_l_self_state_keys_14_exp_avg_": [768], "l_self_state_list_l_self_state_keys_15_exp_avg_": [768], "l_self_state_list_l_self_state_keys_16_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_17_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_18_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_19_exp_avg_": [768], "l_self_state_list_l_self_state_keys_20_exp_avg_": [768], "l_self_state_list_l_self_state_keys_21_exp_avg_": [768], "l_self_state_list_l_self_state_keys_22_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_23_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_24_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_25_exp_avg_": [768], "l_self_state_list_l_self_state_keys_26_exp_avg_": [768], "l_self_state_list_l_self_state_keys_27_exp_avg_": [768], "l_self_state_list_l_self_state_keys_28_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_29_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_30_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_31_exp_avg_": [768], "l_self_state_list_l_self_state_keys_32_exp_avg_": [768], "l_self_state_list_l_self_state_keys_33_exp_avg_": [768], "l_self_state_list_l_self_state_keys_34_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_35_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_36_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_37_exp_avg_": [768], "l_self_state_list_l_self_state_keys_38_exp_avg_": [768], "l_self_state_list_l_self_state_keys_39_exp_avg_": [768], "l_self_state_list_l_self_state_keys_40_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_41_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_42_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_43_exp_avg_": [768], "l_self_state_list_l_self_state_keys_44_exp_avg_": [768], "l_self_state_list_l_self_state_keys_45_exp_avg_": [768], "l_self_state_list_l_self_state_keys_46_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_47_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_48_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_49_exp_avg_": [768], "l_self_state_list_l_self_state_keys_50_exp_avg_": [768], "l_self_state_list_l_self_state_keys_51_exp_avg_": [768], "l_self_state_list_l_self_state_keys_52_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_53_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_54_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_55_exp_avg_": [768], "l_self_state_list_l_self_state_keys_56_exp_avg_": [768], "l_self_state_list_l_self_state_keys_57_exp_avg_": [768], "l_self_state_list_l_self_state_keys_58_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_59_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_60_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_61_exp_avg_": [768], "l_self_state_list_l_self_state_keys_62_exp_avg_": [768], "l_self_state_list_l_self_state_keys_63_exp_avg_": [768], "l_self_state_list_l_self_state_keys_64_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_65_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_66_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_67_exp_avg_": [768], "l_self_state_list_l_self_state_keys_68_exp_avg_": [768], "l_self_state_list_l_self_state_keys_69_exp_avg_": [768], "l_self_state_list_l_self_state_keys_70_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_71_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_72_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_73_exp_avg_": [768], "l_self_state_list_l_self_state_keys_74_exp_avg_": [768], "l_self_state_list_l_self_state_keys_75_exp_avg_": [768], "l_self_state_list_l_self_state_keys_76_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_77_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_78_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_79_exp_avg_": [768], "l_self_state_list_l_self_state_keys_80_exp_avg_": [768], "l_self_state_list_l_self_state_keys_81_exp_avg_": [768], "l_self_state_list_l_self_state_keys_82_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_83_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_84_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_85_exp_avg_": [768], "l_self_state_list_l_self_state_keys_86_exp_avg_": [768], "l_self_state_list_l_self_state_keys_87_exp_avg_": [768], "l_self_state_list_l_self_state_keys_88_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_89_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_90_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_91_exp_avg_": [768], "l_self_state_list_l_self_state_keys_92_exp_avg_": [768], "l_self_state_list_l_self_state_keys_93_exp_avg_": [768], "l_self_state_list_l_self_state_keys_94_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_95_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_96_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_97_exp_avg_": [768], "l_self_state_list_l_self_state_keys_98_exp_avg_": [768], "l_self_state_list_l_self_state_keys_99_exp_avg_": [768], "l_self_state_list_l_self_state_keys_100_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_101_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_102_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_103_exp_avg_": [768], "l_self_state_list_l_self_state_keys_104_exp_avg_": [768], "l_self_state_list_l_self_state_keys_105_exp_avg_": [768], "l_self_state_list_l_self_state_keys_106_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_107_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_108_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_109_exp_avg_": [768], "l_self_state_list_l_self_state_keys_110_exp_avg_": [768], "l_self_state_list_l_self_state_keys_111_exp_avg_": [768], "l_self_state_list_l_self_state_keys_112_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_113_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_114_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_115_exp_avg_": [768], "l_self_state_list_l_self_state_keys_116_exp_avg_": [768], "l_self_state_list_l_self_state_keys_117_exp_avg_": [768], "l_self_state_list_l_self_state_keys_118_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_119_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_120_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_121_exp_avg_": [768], "l_self_state_list_l_self_state_keys_122_exp_avg_": [768], "l_self_state_list_l_self_state_keys_123_exp_avg_": [768], "l_self_state_list_l_self_state_keys_124_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_125_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_126_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_127_exp_avg_": [768], "l_self_state_list_l_self_state_keys_128_exp_avg_": [768], "l_self_state_list_l_self_state_keys_129_exp_avg_": [768], "l_self_state_list_l_self_state_keys_130_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_131_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_132_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_133_exp_avg_": [768], "l_self_state_list_l_self_state_keys_134_exp_avg_": [768], "l_self_state_list_l_self_state_keys_135_exp_avg_": [768], "l_self_state_list_l_self_state_keys_136_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_137_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_138_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_139_exp_avg_": [768], "l_self_state_list_l_self_state_keys_140_exp_avg_": [768], "l_self_state_list_l_self_state_keys_141_exp_avg_": [768], "l_self_state_list_l_self_state_keys_142_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_143_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_144_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_145_exp_avg_": [768], "l_self_state_list_l_self_state_keys_146_exp_avg_": [768], "l_self_state_list_l_self_state_keys_147_exp_avg_": [768], "l_self_state_list_l_self_state_keys_0_exp_avg_sq_": [50304, 768], "l_self_state_list_l_self_state_keys_2_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_3_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_4_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_5_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_6_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_7_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_8_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_9_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_10_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_11_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_12_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_13_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_14_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_15_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_16_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_17_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_18_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_19_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_20_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_21_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_22_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_23_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_24_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_25_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_26_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_27_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_28_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_29_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_30_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_31_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_32_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_33_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_34_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_35_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_36_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_37_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_38_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_39_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_40_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_41_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_42_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_43_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_44_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_45_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_46_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_47_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_48_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_49_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_50_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_51_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_52_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_53_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_54_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_55_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_56_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_57_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_58_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_59_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_60_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_61_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_62_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_63_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_64_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_65_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_66_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_67_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_68_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_69_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_70_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_71_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_72_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_73_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_74_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_75_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_76_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_77_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_78_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_79_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_80_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_81_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_82_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_83_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_84_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_85_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_86_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_87_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_88_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_89_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_90_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_91_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_92_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_93_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_94_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_95_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_96_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_97_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_98_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_99_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_100_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_101_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_102_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_103_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_104_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_105_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_106_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_107_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_108_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_109_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_110_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_111_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_112_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_113_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_114_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_115_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_116_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_117_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_118_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_119_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_120_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_121_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_122_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_123_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_124_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_125_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_126_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_127_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_128_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_129_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_130_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_131_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_132_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_133_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_134_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_135_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_136_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_137_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_138_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_139_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_140_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_141_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_142_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_143_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_144_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_145_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_146_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_147_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_0_step_": [], "l_self_state_list_l_self_state_keys_2_step_": [], "l_self_state_list_l_self_state_keys_3_step_": [], "l_self_state_list_l_self_state_keys_4_step_": [], "l_self_state_list_l_self_state_keys_5_step_": [], "l_self_state_list_l_self_state_keys_6_step_": [], "l_self_state_list_l_self_state_keys_7_step_": [], "l_self_state_list_l_self_state_keys_8_step_": [], "l_self_state_list_l_self_state_keys_9_step_": [], "l_self_state_list_l_self_state_keys_10_step_": [], "l_self_state_list_l_self_state_keys_11_step_": [], "l_self_state_list_l_self_state_keys_12_step_": [], "l_self_state_list_l_self_state_keys_13_step_": [], "l_self_state_list_l_self_state_keys_14_step_": [], "l_self_state_list_l_self_state_keys_15_step_": [], "l_self_state_list_l_self_state_keys_16_step_": [], "l_self_state_list_l_self_state_keys_17_step_": [], "l_self_state_list_l_self_state_keys_18_step_": [], "l_self_state_list_l_self_state_keys_19_step_": [], "l_self_state_list_l_self_state_keys_20_step_": [], "l_self_state_list_l_self_state_keys_21_step_": [], "l_self_state_list_l_self_state_keys_22_step_": [], "l_self_state_list_l_self_state_keys_23_step_": [], "l_self_state_list_l_self_state_keys_24_step_": [], "l_self_state_list_l_self_state_keys_25_step_": [], "l_self_state_list_l_self_state_keys_26_step_": [], "l_self_state_list_l_self_state_keys_27_step_": [], "l_self_state_list_l_self_state_keys_28_step_": [], "l_self_state_list_l_self_state_keys_29_step_": [], "l_self_state_list_l_self_state_keys_30_step_": [], "l_self_state_list_l_self_state_keys_31_step_": [], "l_self_state_list_l_self_state_keys_32_step_": [], "l_self_state_list_l_self_state_keys_33_step_": [], "l_self_state_list_l_self_state_keys_34_step_": [], "l_self_state_list_l_self_state_keys_35_step_": [], "l_self_state_list_l_self_state_keys_36_step_": [], "l_self_state_list_l_self_state_keys_37_step_": [], "l_self_state_list_l_self_state_keys_38_step_": [], "l_self_state_list_l_self_state_keys_39_step_": [], "l_self_state_list_l_self_state_keys_40_step_": [], "l_self_state_list_l_self_state_keys_41_step_": [], "l_self_state_list_l_self_state_keys_42_step_": [], "l_self_state_list_l_self_state_keys_43_step_": [], "l_self_state_list_l_self_state_keys_44_step_": [], "l_self_state_list_l_self_state_keys_45_step_": [], "l_self_state_list_l_self_state_keys_46_step_": [], "l_self_state_list_l_self_state_keys_47_step_": [], "l_self_state_list_l_self_state_keys_48_step_": [], "l_self_state_list_l_self_state_keys_49_step_": [], "l_self_state_list_l_self_state_keys_50_step_": [], "l_self_state_list_l_self_state_keys_51_step_": [], "l_self_state_list_l_self_state_keys_52_step_": [], "l_self_state_list_l_self_state_keys_53_step_": [], "l_self_state_list_l_self_state_keys_54_step_": [], "l_self_state_list_l_self_state_keys_55_step_": [], "l_self_state_list_l_self_state_keys_56_step_": [], "l_self_state_list_l_self_state_keys_57_step_": [], "l_self_state_list_l_self_state_keys_58_step_": [], "l_self_state_list_l_self_state_keys_59_step_": [], "l_self_state_list_l_self_state_keys_60_step_": [], "l_self_state_list_l_self_state_keys_61_step_": [], "l_self_state_list_l_self_state_keys_62_step_": [], "l_self_state_list_l_self_state_keys_63_step_": [], "l_self_state_list_l_self_state_keys_64_step_": [], "l_self_state_list_l_self_state_keys_65_step_": [], "l_self_state_list_l_self_state_keys_66_step_": [], "l_self_state_list_l_self_state_keys_67_step_": [], "l_self_state_list_l_self_state_keys_68_step_": [], "l_self_state_list_l_self_state_keys_69_step_": [], "l_self_state_list_l_self_state_keys_70_step_": [], "l_self_state_list_l_self_state_keys_71_step_": [], "l_self_state_list_l_self_state_keys_72_step_": [], "l_self_state_list_l_self_state_keys_73_step_": [], "l_self_state_list_l_self_state_keys_74_step_": [], "l_self_state_list_l_self_state_keys_75_step_": [], "l_self_state_list_l_self_state_keys_76_step_": [], "l_self_state_list_l_self_state_keys_77_step_": [], "l_self_state_list_l_self_state_keys_78_step_": [], "l_self_state_list_l_self_state_keys_79_step_": [], "l_self_state_list_l_self_state_keys_80_step_": [], "l_self_state_list_l_self_state_keys_81_step_": [], "l_self_state_list_l_self_state_keys_82_step_": [], "l_self_state_list_l_self_state_keys_83_step_": [], "l_self_state_list_l_self_state_keys_84_step_": [], "l_self_state_list_l_self_state_keys_85_step_": [], "l_self_state_list_l_self_state_keys_86_step_": [], "l_self_state_list_l_self_state_keys_87_step_": [], "l_self_state_list_l_self_state_keys_88_step_": [], "l_self_state_list_l_self_state_keys_89_step_": [], "l_self_state_list_l_self_state_keys_90_step_": [], "l_self_state_list_l_self_state_keys_91_step_": [], "l_self_state_list_l_self_state_keys_92_step_": [], "l_self_state_list_l_self_state_keys_93_step_": [], "l_self_state_list_l_self_state_keys_94_step_": [], "l_self_state_list_l_self_state_keys_95_step_": [], "l_self_state_list_l_self_state_keys_96_step_": [], "l_self_state_list_l_self_state_keys_97_step_": [], "l_self_state_list_l_self_state_keys_98_step_": [], "l_self_state_list_l_self_state_keys_99_step_": [], "l_self_state_list_l_self_state_keys_100_step_": [], "l_self_state_list_l_self_state_keys_101_step_": [], "l_self_state_list_l_self_state_keys_102_step_": [], "l_self_state_list_l_self_state_keys_103_step_": [], "l_self_state_list_l_self_state_keys_104_step_": [], "l_self_state_list_l_self_state_keys_105_step_": [], "l_self_state_list_l_self_state_keys_106_step_": [], "l_self_state_list_l_self_state_keys_107_step_": [], "l_self_state_list_l_self_state_keys_108_step_": [], "l_self_state_list_l_self_state_keys_109_step_": [], "l_self_state_list_l_self_state_keys_110_step_": [], "l_self_state_list_l_self_state_keys_111_step_": [], "l_self_state_list_l_self_state_keys_112_step_": [], "l_self_state_list_l_self_state_keys_113_step_": [], "l_self_state_list_l_self_state_keys_114_step_": [], "l_self_state_list_l_self_state_keys_115_step_": [], "l_self_state_list_l_self_state_keys_116_step_": [], "l_self_state_list_l_self_state_keys_117_step_": [], "l_self_state_list_l_self_state_keys_118_step_": [], "l_self_state_list_l_self_state_keys_119_step_": [], "l_self_state_list_l_self_state_keys_120_step_": [], "l_self_state_list_l_self_state_keys_121_step_": [], "l_self_state_list_l_self_state_keys_122_step_": [], "l_self_state_list_l_self_state_keys_123_step_": [], "l_self_state_list_l_self_state_keys_124_step_": [], "l_self_state_list_l_self_state_keys_125_step_": [], "l_self_state_list_l_self_state_keys_126_step_": [], "l_self_state_list_l_self_state_keys_127_step_": [], "l_self_state_list_l_self_state_keys_128_step_": [], "l_self_state_list_l_self_state_keys_129_step_": [], "l_self_state_list_l_self_state_keys_130_step_": [], "l_self_state_list_l_self_state_keys_131_step_": [], "l_self_state_list_l_self_state_keys_132_step_": [], "l_self_state_list_l_self_state_keys_133_step_": [], "l_self_state_list_l_self_state_keys_134_step_": [], "l_self_state_list_l_self_state_keys_135_step_": [], "l_self_state_list_l_self_state_keys_136_step_": [], "l_self_state_list_l_self_state_keys_137_step_": [], "l_self_state_list_l_self_state_keys_138_step_": [], "l_self_state_list_l_self_state_keys_139_step_": [], "l_self_state_list_l_self_state_keys_140_step_": [], "l_self_state_list_l_self_state_keys_141_step_": [], "l_self_state_list_l_self_state_keys_142_step_": [], "l_self_state_list_l_self_state_keys_143_step_": [], "l_self_state_list_l_self_state_keys_144_step_": [], "l_self_state_list_l_self_state_keys_145_step_": [], "l_self_state_list_l_self_state_keys_146_step_": [], "l_self_state_list_l_self_state_keys_147_step_": [], "getitem_592": [], "getitem_593": [], "getitem_594": [], "getitem_595": [], "getitem_596": [], "getitem_597": [], "getitem_598": [], "getitem_599": [], "getitem_600": [], "getitem_601": [], "getitem_602": [], "getitem_603": [], "getitem_604": [], "getitem_605": [], "getitem_606": [], "getitem_607": [], "getitem_608": [], "getitem_609": [], "getitem_610": [], "getitem_611": [], "getitem_612": [], "getitem_613": [], "getitem_614": [], "getitem_615": [], "getitem_616": [], "getitem_617": [], "getitem_618": [], "getitem_619": [], "getitem_620": [], "getitem_621": [], "getitem_622": [], "getitem_623": [], "getitem_624": [], "getitem_625": [], "getitem_626": [], "getitem_627": [], "getitem_628": [], "getitem_629": [], "getitem_630": [], "getitem_631": [], "getitem_632": [], "getitem_633": [], "getitem_634": [], "getitem_635": [], "getitem_636": [], "getitem_637": [], "getitem_638": [], "getitem_639": [], "getitem_640": [], "getitem_641": [], "getitem_642": [], "getitem_643": [], "getitem_644": [], "getitem_645": [], "getitem_646": [], "getitem_647": [], "getitem_648": [], "getitem_649": [], "getitem_650": [], "getitem_651": [], "getitem_652": [], "getitem_653": [], "getitem_654": [], "getitem_655": [], "getitem_656": [], "getitem_657": [], "getitem_658": [], "getitem_659": [], "getitem_660": [], "getitem_661": [], "getitem_662": [], "getitem_663": [], "getitem_664": [], "getitem_665": [], "getitem_666": [], "getitem_667": [], "getitem_668": [], "getitem_669": [], "getitem_670": [], "getitem_671": [], "getitem_672": [], "getitem_673": [], "getitem_674": [], "getitem_675": [], "getitem_676": [], "getitem_677": [], "getitem_678": [], "getitem_679": [], "getitem_680": [], "getitem_681": [], "getitem_682": [], "getitem_683": [], "getitem_684": [], "getitem_685": [], "getitem_686": [], "getitem_687": [], "getitem_688": [], "getitem_689": [], "getitem_690": [], "getitem_691": [], "getitem_692": [], "getitem_693": [], "getitem_694": [], "getitem_695": [], "getitem_696": [], "getitem_697": [], "getitem_698": [], "getitem_699": [], "getitem_700": [], "getitem_701": [], "getitem_702": [], "getitem_703": [], "getitem_704": [], "getitem_705": [], "getitem_706": [], "getitem_707": [], "getitem_708": [], "getitem_709": [], "getitem_710": [], "getitem_711": [], "getitem_712": [], "getitem_713": [], "getitem_714": [], "getitem_715": [], "getitem_716": [], "getitem_717": [], "getitem_718": [], "getitem_719": [], "getitem_720": [], "getitem_721": [], "getitem_722": [], "getitem_723": [], "getitem_724": [], "getitem_725": [], "getitem_726": [], "getitem_727": [], "getitem_728": [], "getitem_729": [], "getitem_730": [], "getitem_731": [], "getitem_732": [], "getitem_733": [], "getitem_734": [], "getitem_735": [], "getitem_736": [], "getitem_737": [], "getitem_738": [], "getitem_739": [], "getitem_740": [], "getitem_741": [], "getitem_742": [], "getitem_743": [], "getitem_744": [], "getitem_745": [], "getitem_746": [], "getitem_747": [], "getitem_748": [], "getitem_749": [], "getitem_750": [], "getitem_751": [], "getitem_752": [], "getitem_753": [], "getitem_754": [], "getitem_755": [], "getitem_756": [], "getitem_757": [], "getitem_758": [], "getitem_759": [], "getitem_760": [], "getitem_761": [], "getitem_762": [], "getitem_763": [], "getitem_764": [], "getitem_765": [], "getitem_766": [], "getitem_767": [], "getitem_768": [], "getitem_769": [], "getitem_770": [], "getitem_771": [], "getitem_772": [], "getitem_773": [], "getitem_774": [], "getitem_775": [], "getitem_776": [], "getitem_777": [], "getitem_778": [], "getitem_779": [], "getitem_780": [], "getitem_781": [], "getitem_782": [], "getitem_783": [], "getitem_784": [], "getitem_785": [], "getitem_786": [], "getitem_787": [], "getitem_788": [], "getitem_789": [], "getitem_790": [], "getitem_791": [], "getitem_792": [], "getitem_793": [], "getitem_794": [], "getitem_795": [], "getitem_796": [], "getitem_797": [], "getitem_798": [], "getitem_799": [], "getitem_800": [], "getitem_801": [], "getitem_802": [], "getitem_803": [], "getitem_804": [], "getitem_805": [], "getitem_806": [], "getitem_807": [], "getitem_808": [], "getitem_809": [], "getitem_810": [], "getitem_811": [], "getitem_812": [], "getitem_813": [], "getitem_814": [], "getitem_815": [], "getitem_816": [], "getitem_817": [], "getitem_818": [], "getitem_819": [], "getitem_820": [], "getitem_821": [], "getitem_822": [], "getitem_823": [], "getitem_824": [], "getitem_825": [], "getitem_826": [], "getitem_827": [], "getitem_828": [], "getitem_829": [], "getitem_830": [], "getitem_831": [], "getitem_832": [], "getitem_833": [], "getitem_834": [], "getitem_835": [], "getitem_836": [], "getitem_837": [], "getitem_838": [], "getitem_839": [], "getitem_840": [], "getitem_841": [], "getitem_842": [], "getitem_843": [], "getitem_844": [], "getitem_845": [], "getitem_846": [], "getitem_847": [], "getitem_848": [], "getitem_849": [], "getitem_850": [], "getitem_851": [], "getitem_852": [], "getitem_853": [], "getitem_854": [], "getitem_855": [], "getitem_856": [], "getitem_857": [], "getitem_858": [], "getitem_859": [], "getitem_860": [], "getitem_861": [], "getitem_862": [], "getitem_863": [], "getitem_864": [], "getitem_865": [], "getitem_866": [], "getitem_867": [], "getitem_868": [], "getitem_869": [], "getitem_870": [], "getitem_871": [], "getitem_872": [], "getitem_873": [], "getitem_874": [], "getitem_875": [], "getitem_876": [], "getitem_877": [], "getitem_878": [], "getitem_879": [], "getitem_880": [], "getitem_881": [], "getitem_882": [], "getitem_883": [], "getitem_884": [], "getitem_885": [], "getitem_886": [], "getitem_887": [], "getitem_1776": [50304, 768], "getitem_1777": [1024, 768], "getitem_1778": [768], "getitem_1779": [768], "getitem_1780": [2304, 768], "getitem_1781": [2304], "getitem_1782": [768, 768], "getitem_1783": [768], "getitem_1784": [768], "getitem_1785": [768], "getitem_1786": [3072, 768], "getitem_1787": [3072], "getitem_1788": [768, 3072], "getitem_1789": [768], "getitem_1790": [768], "getitem_1791": [768], "getitem_1792": [2304, 768], "getitem_1793": [2304], "getitem_1794": [768, 768], "getitem_1795": [768], "getitem_1796": [768], "getitem_1797": [768], "getitem_1798": [3072, 768], "getitem_1799": [3072], "getitem_1800": [768, 3072], "getitem_1801": [768], "getitem_1802": [768], "getitem_1803": [768], "getitem_1804": [2304, 768], "getitem_1805": [2304], "getitem_1806": [768, 768], "getitem_1807": [768], "getitem_1808": [768], "getitem_1809": [768], "getitem_1810": [3072, 768], "getitem_1811": [3072], "getitem_1812": [768, 3072], "getitem_1813": [768], "getitem_1814": [768], "getitem_1815": [768], "getitem_1816": [2304, 768], "getitem_1817": [2304], "getitem_1818": [768, 768], "getitem_1819": [768], "getitem_1820": [768], "getitem_1821": [768], "getitem_1822": [3072, 768], "getitem_1823": [3072], "getitem_1824": [768, 3072], "getitem_1825": [768], "getitem_1826": [768], "getitem_1827": [768], "getitem_1828": [2304, 768], "getitem_1829": [2304], "getitem_1830": [768, 768], "getitem_1831": [768], "getitem_1832": [768], "getitem_1833": [768], "getitem_1834": [3072, 768], "getitem_1835": [3072], "getitem_1836": [768, 3072], "getitem_1837": [768], "getitem_1838": [768], "getitem_1839": [768], "getitem_1840": [2304, 768], "getitem_1841": [2304], "getitem_1842": [768, 768], "getitem_1843": [768], "getitem_1844": [768], "getitem_1845": [768], "getitem_1846": [3072, 768], "getitem_1847": [3072], "getitem_1848": [768, 3072], "getitem_1849": [768], "getitem_1850": [768], "getitem_1851": [768], "getitem_1852": [2304, 768], "getitem_1853": [2304], "getitem_1854": [768, 768], "getitem_1855": [768], "getitem_1856": [768], "getitem_1857": [768], "getitem_1858": [3072, 768], "getitem_1859": [3072], "getitem_1860": [768, 3072], "getitem_1861": [768], "getitem_1862": [768], "getitem_1863": [768], "getitem_1864": [2304, 768], "getitem_1865": [2304], "getitem_1866": [768, 768], "getitem_1867": [768], "getitem_1868": [768], "getitem_1869": [768], "getitem_1870": [3072, 768], "getitem_1871": [3072], "getitem_1872": [768, 3072], "getitem_1873": [768], "getitem_1874": [768], "getitem_1875": [768], "getitem_1876": [2304, 768], "getitem_1877": [2304], "getitem_1878": [768, 768], "getitem_1879": [768], "getitem_1880": [768], "getitem_1881": [768], "getitem_1882": [3072, 768], "getitem_1883": [3072], "getitem_1884": [768, 3072], "getitem_1885": [768], "getitem_1886": [768], "getitem_1887": [768], "getitem_1888": [2304, 768], "getitem_1889": [2304], "getitem_1890": [768, 768], "getitem_1891": [768], "getitem_1892": [768], "getitem_1893": [768], "getitem_1894": [3072, 768], "getitem_1895": [3072], "getitem_1896": [768, 3072], "getitem_1897": [768], "getitem_1898": [768], "getitem_1899": [768], "getitem_1900": [2304, 768], "getitem_1901": [2304], "getitem_1902": [768, 768], "getitem_1903": [768], "getitem_1904": [768], "getitem_1905": [768], "getitem_1906": [3072, 768], "getitem_1907": [3072], "getitem_1908": [768, 3072], "getitem_1909": [768], "getitem_1910": [768], "getitem_1911": [768], "getitem_1912": [2304, 768], "getitem_1913": [2304], "getitem_1914": [768, 768], "getitem_1915": [768], "getitem_1916": [768], "getitem_1917": [768], "getitem_1918": [3072, 768], "getitem_1919": [3072], "getitem_1920": [768, 3072], "getitem_1921": [768], "getitem_1922": [768], "getitem_1923": [768]}}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "81513b26edcd911a5160c972fdbca10f"} + class GraphModule(torch.nn.Module): + def forward(self, L_self_param_groups_0_params_0_: "f32[50304, 768][768, 1]cuda:0", L_self_param_groups_0_params_1_: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_2_: "f32[768][1]cuda:0", L_self_param_groups_0_params_3_: "f32[768][1]cuda:0", L_self_param_groups_0_params_4_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_5_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_6_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_7_: "f32[768][1]cuda:0", L_self_param_groups_0_params_8_: "f32[768][1]cuda:0", L_self_param_groups_0_params_9_: "f32[768][1]cuda:0", L_self_param_groups_0_params_10_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_11_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_12_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_13_: "f32[768][1]cuda:0", L_self_param_groups_0_params_14_: "f32[768][1]cuda:0", L_self_param_groups_0_params_15_: "f32[768][1]cuda:0", L_self_param_groups_0_params_16_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_17_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_18_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_19_: "f32[768][1]cuda:0", L_self_param_groups_0_params_20_: "f32[768][1]cuda:0", L_self_param_groups_0_params_21_: "f32[768][1]cuda:0", L_self_param_groups_0_params_22_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_23_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_24_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_25_: "f32[768][1]cuda:0", L_self_param_groups_0_params_26_: "f32[768][1]cuda:0", L_self_param_groups_0_params_27_: "f32[768][1]cuda:0", L_self_param_groups_0_params_28_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_29_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_30_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_31_: "f32[768][1]cuda:0", L_self_param_groups_0_params_32_: "f32[768][1]cuda:0", L_self_param_groups_0_params_33_: "f32[768][1]cuda:0", L_self_param_groups_0_params_34_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_35_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_36_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_37_: "f32[768][1]cuda:0", L_self_param_groups_0_params_38_: "f32[768][1]cuda:0", L_self_param_groups_0_params_39_: "f32[768][1]cuda:0", L_self_param_groups_0_params_40_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_41_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_42_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_43_: "f32[768][1]cuda:0", L_self_param_groups_0_params_44_: "f32[768][1]cuda:0", L_self_param_groups_0_params_45_: "f32[768][1]cuda:0", L_self_param_groups_0_params_46_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_47_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_48_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_49_: "f32[768][1]cuda:0", L_self_param_groups_0_params_50_: "f32[768][1]cuda:0", L_self_param_groups_0_params_51_: "f32[768][1]cuda:0", L_self_param_groups_0_params_52_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_53_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_54_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_55_: "f32[768][1]cuda:0", L_self_param_groups_0_params_56_: "f32[768][1]cuda:0", L_self_param_groups_0_params_57_: "f32[768][1]cuda:0", L_self_param_groups_0_params_58_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_59_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_60_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_61_: "f32[768][1]cuda:0", L_self_param_groups_0_params_62_: "f32[768][1]cuda:0", L_self_param_groups_0_params_63_: "f32[768][1]cuda:0", L_self_param_groups_0_params_64_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_65_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_66_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_67_: "f32[768][1]cuda:0", L_self_param_groups_0_params_68_: "f32[768][1]cuda:0", L_self_param_groups_0_params_69_: "f32[768][1]cuda:0", L_self_param_groups_0_params_70_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_71_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_72_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_73_: "f32[768][1]cuda:0", L_self_param_groups_0_params_74_: "f32[768][1]cuda:0", L_self_param_groups_0_params_75_: "f32[768][1]cuda:0", L_self_param_groups_0_params_76_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_77_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_78_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_79_: "f32[768][1]cuda:0", L_self_param_groups_0_params_80_: "f32[768][1]cuda:0", L_self_param_groups_0_params_81_: "f32[768][1]cuda:0", L_self_param_groups_0_params_82_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_83_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_84_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_85_: "f32[768][1]cuda:0", L_self_param_groups_0_params_86_: "f32[768][1]cuda:0", L_self_param_groups_0_params_87_: "f32[768][1]cuda:0", L_self_param_groups_0_params_88_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_89_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_90_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_91_: "f32[768][1]cuda:0", L_self_param_groups_0_params_92_: "f32[768][1]cuda:0", L_self_param_groups_0_params_93_: "f32[768][1]cuda:0", L_self_param_groups_0_params_94_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_95_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_96_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_97_: "f32[768][1]cuda:0", L_self_param_groups_0_params_98_: "f32[768][1]cuda:0", L_self_param_groups_0_params_99_: "f32[768][1]cuda:0", L_self_param_groups_0_params_100_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_101_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_102_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_103_: "f32[768][1]cuda:0", L_self_param_groups_0_params_104_: "f32[768][1]cuda:0", L_self_param_groups_0_params_105_: "f32[768][1]cuda:0", L_self_param_groups_0_params_106_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_107_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_108_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_109_: "f32[768][1]cuda:0", L_self_param_groups_0_params_110_: "f32[768][1]cuda:0", L_self_param_groups_0_params_111_: "f32[768][1]cuda:0", L_self_param_groups_0_params_112_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_113_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_114_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_115_: "f32[768][1]cuda:0", L_self_param_groups_0_params_116_: "f32[768][1]cuda:0", L_self_param_groups_0_params_117_: "f32[768][1]cuda:0", L_self_param_groups_0_params_118_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_119_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_120_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_121_: "f32[768][1]cuda:0", L_self_param_groups_0_params_122_: "f32[768][1]cuda:0", L_self_param_groups_0_params_123_: "f32[768][1]cuda:0", L_self_param_groups_0_params_124_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_125_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_126_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_127_: "f32[768][1]cuda:0", L_self_param_groups_0_params_128_: "f32[768][1]cuda:0", L_self_param_groups_0_params_129_: "f32[768][1]cuda:0", L_self_param_groups_0_params_130_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_131_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_132_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_133_: "f32[768][1]cuda:0", L_self_param_groups_0_params_134_: "f32[768][1]cuda:0", L_self_param_groups_0_params_135_: "f32[768][1]cuda:0", L_self_param_groups_0_params_136_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_137_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_138_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_139_: "f32[768][1]cuda:0", L_self_param_groups_0_params_140_: "f32[768][1]cuda:0", L_self_param_groups_0_params_141_: "f32[768][1]cuda:0", L_self_param_groups_0_params_142_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_143_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_144_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_145_: "f32[768][1]cuda:0", L_self_param_groups_0_params_146_: "f32[768][1]cuda:0", L_self_param_groups_0_params_147_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_1_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_1_exp_avg_: "f32[1024, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_1_exp_avg_sq_: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_0_grad: "f32[50304, 768][768, 1]cuda:0", L_self_param_groups_0_params_1_grad: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_2_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_3_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_4_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_5_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_6_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_7_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_8_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_9_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_10_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_11_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_12_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_13_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_14_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_15_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_16_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_17_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_18_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_19_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_20_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_21_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_22_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_23_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_24_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_25_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_26_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_27_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_28_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_29_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_30_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_31_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_32_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_33_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_34_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_35_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_36_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_37_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_38_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_39_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_40_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_41_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_42_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_43_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_44_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_45_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_46_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_47_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_48_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_49_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_50_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_51_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_52_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_53_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_54_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_55_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_56_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_57_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_58_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_59_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_60_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_61_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_62_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_63_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_64_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_65_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_66_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_67_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_68_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_69_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_70_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_71_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_72_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_73_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_74_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_75_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_76_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_77_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_78_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_79_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_80_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_81_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_82_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_83_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_84_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_85_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_86_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_87_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_88_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_89_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_90_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_91_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_92_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_93_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_94_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_95_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_96_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_97_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_98_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_99_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_100_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_101_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_102_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_103_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_104_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_105_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_106_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_107_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_108_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_109_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_110_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_111_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_112_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_113_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_114_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_115_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_116_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_117_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_118_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_119_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_120_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_121_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_122_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_123_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_124_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_125_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_126_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_127_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_128_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_129_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_130_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_131_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_132_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_133_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_134_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_135_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_136_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_137_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_138_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_139_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_140_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_141_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_142_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_143_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_144_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_145_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_146_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_147_grad: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_exp_avg_: "f32[50304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_2_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_3_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_4_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_5_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_6_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_7_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_8_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_9_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_10_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_11_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_12_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_13_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_14_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_15_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_16_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_17_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_18_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_19_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_20_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_21_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_22_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_23_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_24_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_25_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_26_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_27_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_28_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_29_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_30_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_31_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_32_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_33_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_34_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_35_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_36_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_37_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_38_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_39_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_40_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_41_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_42_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_43_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_44_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_45_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_46_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_47_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_48_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_49_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_50_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_51_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_52_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_53_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_54_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_55_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_56_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_57_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_58_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_59_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_60_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_61_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_62_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_63_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_64_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_65_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_66_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_67_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_68_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_69_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_70_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_71_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_72_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_73_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_74_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_75_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_76_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_77_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_78_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_79_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_80_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_81_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_82_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_83_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_84_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_85_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_86_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_87_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_88_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_89_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_90_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_91_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_92_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_93_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_94_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_95_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_96_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_97_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_98_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_99_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_100_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_101_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_102_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_103_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_104_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_105_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_106_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_107_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_108_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_109_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_110_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_111_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_112_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_113_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_114_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_115_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_116_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_117_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_118_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_119_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_120_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_121_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_122_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_123_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_124_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_125_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_126_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_127_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_128_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_129_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_130_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_131_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_132_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_133_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_134_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_135_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_136_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_137_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_138_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_139_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_140_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_141_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_142_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_143_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_144_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_145_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_146_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_147_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_exp_avg_sq_: "f32[50304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_2_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_3_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_4_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_5_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_6_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_7_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_8_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_9_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_10_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_11_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_12_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_13_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_14_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_15_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_16_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_17_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_18_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_19_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_20_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_21_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_22_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_23_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_24_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_25_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_26_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_27_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_28_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_29_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_30_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_31_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_32_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_33_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_34_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_35_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_36_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_37_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_38_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_39_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_40_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_41_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_42_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_43_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_44_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_45_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_46_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_47_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_48_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_49_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_50_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_51_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_52_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_53_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_54_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_55_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_56_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_57_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_58_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_59_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_60_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_61_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_62_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_63_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_64_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_65_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_66_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_67_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_68_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_69_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_70_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_71_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_72_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_73_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_74_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_75_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_76_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_77_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_78_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_79_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_80_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_81_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_82_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_83_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_84_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_85_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_86_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_87_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_88_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_89_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_90_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_91_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_92_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_93_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_94_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_95_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_96_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_97_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_98_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_99_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_100_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_101_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_102_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_103_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_104_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_105_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_106_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_107_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_108_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_109_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_110_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_111_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_112_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_113_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_114_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_115_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_116_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_117_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_118_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_119_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_120_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_121_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_122_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_123_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_124_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_125_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_126_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_127_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_128_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_129_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_130_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_131_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_132_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_133_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_134_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_135_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_136_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_137_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_138_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_139_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_140_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_141_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_142_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_143_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_144_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_145_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_146_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_147_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_2_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_3_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_4_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_5_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_6_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_7_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_8_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_9_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_10_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_11_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_12_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_13_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_14_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_15_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_16_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_17_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_18_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_19_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_20_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_21_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_22_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_23_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_24_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_25_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_26_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_27_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_28_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_29_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_30_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_31_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_32_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_33_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_34_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_35_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_36_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_37_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_38_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_39_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_40_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_41_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_42_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_43_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_44_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_45_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_46_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_47_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_48_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_49_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_50_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_51_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_52_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_53_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_54_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_55_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_56_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_57_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_58_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_59_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_60_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_61_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_62_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_63_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_64_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_65_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_66_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_67_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_68_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_69_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_70_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_71_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_72_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_73_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_74_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_75_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_76_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_77_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_78_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_79_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_80_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_81_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_82_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_83_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_84_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_85_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_86_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_87_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_88_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_89_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_90_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_91_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_92_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_93_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_94_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_95_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_96_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_97_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_98_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_99_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_100_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_101_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_102_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_103_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_104_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_105_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_106_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_107_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_108_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_109_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_110_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_111_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_112_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_113_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_114_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_115_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_116_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_117_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_118_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_119_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_120_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_121_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_122_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_123_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_124_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_125_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_126_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_127_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_128_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_129_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_130_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_131_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_132_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_133_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_134_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_135_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_136_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_137_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_138_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_139_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_140_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_141_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_142_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_143_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_144_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_145_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_146_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_147_step_: "f32[][]cuda:0"): + l_self_param_groups_0_params_0_ = L_self_param_groups_0_params_0_ + l_self_param_groups_0_params_1_ = L_self_param_groups_0_params_1_ + l_self_param_groups_0_params_2_ = L_self_param_groups_0_params_2_ + l_self_param_groups_0_params_3_ = L_self_param_groups_0_params_3_ + l_self_param_groups_0_params_4_ = L_self_param_groups_0_params_4_ + l_self_param_groups_0_params_5_ = L_self_param_groups_0_params_5_ + l_self_param_groups_0_params_6_ = L_self_param_groups_0_params_6_ + l_self_param_groups_0_params_7_ = L_self_param_groups_0_params_7_ + l_self_param_groups_0_params_8_ = L_self_param_groups_0_params_8_ + l_self_param_groups_0_params_9_ = L_self_param_groups_0_params_9_ + l_self_param_groups_0_params_10_ = L_self_param_groups_0_params_10_ + l_self_param_groups_0_params_11_ = L_self_param_groups_0_params_11_ + l_self_param_groups_0_params_12_ = L_self_param_groups_0_params_12_ + l_self_param_groups_0_params_13_ = L_self_param_groups_0_params_13_ + l_self_param_groups_0_params_14_ = L_self_param_groups_0_params_14_ + l_self_param_groups_0_params_15_ = L_self_param_groups_0_params_15_ + l_self_param_groups_0_params_16_ = L_self_param_groups_0_params_16_ + l_self_param_groups_0_params_17_ = L_self_param_groups_0_params_17_ + l_self_param_groups_0_params_18_ = L_self_param_groups_0_params_18_ + l_self_param_groups_0_params_19_ = L_self_param_groups_0_params_19_ + l_self_param_groups_0_params_20_ = L_self_param_groups_0_params_20_ + l_self_param_groups_0_params_21_ = L_self_param_groups_0_params_21_ + l_self_param_groups_0_params_22_ = L_self_param_groups_0_params_22_ + l_self_param_groups_0_params_23_ = L_self_param_groups_0_params_23_ + l_self_param_groups_0_params_24_ = L_self_param_groups_0_params_24_ + l_self_param_groups_0_params_25_ = L_self_param_groups_0_params_25_ + l_self_param_groups_0_params_26_ = L_self_param_groups_0_params_26_ + l_self_param_groups_0_params_27_ = L_self_param_groups_0_params_27_ + l_self_param_groups_0_params_28_ = L_self_param_groups_0_params_28_ + l_self_param_groups_0_params_29_ = L_self_param_groups_0_params_29_ + l_self_param_groups_0_params_30_ = L_self_param_groups_0_params_30_ + l_self_param_groups_0_params_31_ = L_self_param_groups_0_params_31_ + l_self_param_groups_0_params_32_ = L_self_param_groups_0_params_32_ + l_self_param_groups_0_params_33_ = L_self_param_groups_0_params_33_ + l_self_param_groups_0_params_34_ = L_self_param_groups_0_params_34_ + l_self_param_groups_0_params_35_ = L_self_param_groups_0_params_35_ + l_self_param_groups_0_params_36_ = L_self_param_groups_0_params_36_ + l_self_param_groups_0_params_37_ = L_self_param_groups_0_params_37_ + l_self_param_groups_0_params_38_ = L_self_param_groups_0_params_38_ + l_self_param_groups_0_params_39_ = L_self_param_groups_0_params_39_ + l_self_param_groups_0_params_40_ = L_self_param_groups_0_params_40_ + l_self_param_groups_0_params_41_ = L_self_param_groups_0_params_41_ + l_self_param_groups_0_params_42_ = L_self_param_groups_0_params_42_ + l_self_param_groups_0_params_43_ = L_self_param_groups_0_params_43_ + l_self_param_groups_0_params_44_ = L_self_param_groups_0_params_44_ + l_self_param_groups_0_params_45_ = L_self_param_groups_0_params_45_ + l_self_param_groups_0_params_46_ = L_self_param_groups_0_params_46_ + l_self_param_groups_0_params_47_ = L_self_param_groups_0_params_47_ + l_self_param_groups_0_params_48_ = L_self_param_groups_0_params_48_ + l_self_param_groups_0_params_49_ = L_self_param_groups_0_params_49_ + l_self_param_groups_0_params_50_ = L_self_param_groups_0_params_50_ + l_self_param_groups_0_params_51_ = L_self_param_groups_0_params_51_ + l_self_param_groups_0_params_52_ = L_self_param_groups_0_params_52_ + l_self_param_groups_0_params_53_ = L_self_param_groups_0_params_53_ + l_self_param_groups_0_params_54_ = L_self_param_groups_0_params_54_ + l_self_param_groups_0_params_55_ = L_self_param_groups_0_params_55_ + l_self_param_groups_0_params_56_ = L_self_param_groups_0_params_56_ + l_self_param_groups_0_params_57_ = L_self_param_groups_0_params_57_ + l_self_param_groups_0_params_58_ = L_self_param_groups_0_params_58_ + l_self_param_groups_0_params_59_ = L_self_param_groups_0_params_59_ + l_self_param_groups_0_params_60_ = L_self_param_groups_0_params_60_ + l_self_param_groups_0_params_61_ = L_self_param_groups_0_params_61_ + l_self_param_groups_0_params_62_ = L_self_param_groups_0_params_62_ + l_self_param_groups_0_params_63_ = L_self_param_groups_0_params_63_ + l_self_param_groups_0_params_64_ = L_self_param_groups_0_params_64_ + l_self_param_groups_0_params_65_ = L_self_param_groups_0_params_65_ + l_self_param_groups_0_params_66_ = L_self_param_groups_0_params_66_ + l_self_param_groups_0_params_67_ = L_self_param_groups_0_params_67_ + l_self_param_groups_0_params_68_ = L_self_param_groups_0_params_68_ + l_self_param_groups_0_params_69_ = L_self_param_groups_0_params_69_ + l_self_param_groups_0_params_70_ = L_self_param_groups_0_params_70_ + l_self_param_groups_0_params_71_ = L_self_param_groups_0_params_71_ + l_self_param_groups_0_params_72_ = L_self_param_groups_0_params_72_ + l_self_param_groups_0_params_73_ = L_self_param_groups_0_params_73_ + l_self_param_groups_0_params_74_ = L_self_param_groups_0_params_74_ + l_self_param_groups_0_params_75_ = L_self_param_groups_0_params_75_ + l_self_param_groups_0_params_76_ = L_self_param_groups_0_params_76_ + l_self_param_groups_0_params_77_ = L_self_param_groups_0_params_77_ + l_self_param_groups_0_params_78_ = L_self_param_groups_0_params_78_ + l_self_param_groups_0_params_79_ = L_self_param_groups_0_params_79_ + l_self_param_groups_0_params_80_ = L_self_param_groups_0_params_80_ + l_self_param_groups_0_params_81_ = L_self_param_groups_0_params_81_ + l_self_param_groups_0_params_82_ = L_self_param_groups_0_params_82_ + l_self_param_groups_0_params_83_ = L_self_param_groups_0_params_83_ + l_self_param_groups_0_params_84_ = L_self_param_groups_0_params_84_ + l_self_param_groups_0_params_85_ = L_self_param_groups_0_params_85_ + l_self_param_groups_0_params_86_ = L_self_param_groups_0_params_86_ + l_self_param_groups_0_params_87_ = L_self_param_groups_0_params_87_ + l_self_param_groups_0_params_88_ = L_self_param_groups_0_params_88_ + l_self_param_groups_0_params_89_ = L_self_param_groups_0_params_89_ + l_self_param_groups_0_params_90_ = L_self_param_groups_0_params_90_ + l_self_param_groups_0_params_91_ = L_self_param_groups_0_params_91_ + l_self_param_groups_0_params_92_ = L_self_param_groups_0_params_92_ + l_self_param_groups_0_params_93_ = L_self_param_groups_0_params_93_ + l_self_param_groups_0_params_94_ = L_self_param_groups_0_params_94_ + l_self_param_groups_0_params_95_ = L_self_param_groups_0_params_95_ + l_self_param_groups_0_params_96_ = L_self_param_groups_0_params_96_ + l_self_param_groups_0_params_97_ = L_self_param_groups_0_params_97_ + l_self_param_groups_0_params_98_ = L_self_param_groups_0_params_98_ + l_self_param_groups_0_params_99_ = L_self_param_groups_0_params_99_ + l_self_param_groups_0_params_100_ = L_self_param_groups_0_params_100_ + l_self_param_groups_0_params_101_ = L_self_param_groups_0_params_101_ + l_self_param_groups_0_params_102_ = L_self_param_groups_0_params_102_ + l_self_param_groups_0_params_103_ = L_self_param_groups_0_params_103_ + l_self_param_groups_0_params_104_ = L_self_param_groups_0_params_104_ + l_self_param_groups_0_params_105_ = L_self_param_groups_0_params_105_ + l_self_param_groups_0_params_106_ = L_self_param_groups_0_params_106_ + l_self_param_groups_0_params_107_ = L_self_param_groups_0_params_107_ + l_self_param_groups_0_params_108_ = L_self_param_groups_0_params_108_ + l_self_param_groups_0_params_109_ = L_self_param_groups_0_params_109_ + l_self_param_groups_0_params_110_ = L_self_param_groups_0_params_110_ + l_self_param_groups_0_params_111_ = L_self_param_groups_0_params_111_ + l_self_param_groups_0_params_112_ = L_self_param_groups_0_params_112_ + l_self_param_groups_0_params_113_ = L_self_param_groups_0_params_113_ + l_self_param_groups_0_params_114_ = L_self_param_groups_0_params_114_ + l_self_param_groups_0_params_115_ = L_self_param_groups_0_params_115_ + l_self_param_groups_0_params_116_ = L_self_param_groups_0_params_116_ + l_self_param_groups_0_params_117_ = L_self_param_groups_0_params_117_ + l_self_param_groups_0_params_118_ = L_self_param_groups_0_params_118_ + l_self_param_groups_0_params_119_ = L_self_param_groups_0_params_119_ + l_self_param_groups_0_params_120_ = L_self_param_groups_0_params_120_ + l_self_param_groups_0_params_121_ = L_self_param_groups_0_params_121_ + l_self_param_groups_0_params_122_ = L_self_param_groups_0_params_122_ + l_self_param_groups_0_params_123_ = L_self_param_groups_0_params_123_ + l_self_param_groups_0_params_124_ = L_self_param_groups_0_params_124_ + l_self_param_groups_0_params_125_ = L_self_param_groups_0_params_125_ + l_self_param_groups_0_params_126_ = L_self_param_groups_0_params_126_ + l_self_param_groups_0_params_127_ = L_self_param_groups_0_params_127_ + l_self_param_groups_0_params_128_ = L_self_param_groups_0_params_128_ + l_self_param_groups_0_params_129_ = L_self_param_groups_0_params_129_ + l_self_param_groups_0_params_130_ = L_self_param_groups_0_params_130_ + l_self_param_groups_0_params_131_ = L_self_param_groups_0_params_131_ + l_self_param_groups_0_params_132_ = L_self_param_groups_0_params_132_ + l_self_param_groups_0_params_133_ = L_self_param_groups_0_params_133_ + l_self_param_groups_0_params_134_ = L_self_param_groups_0_params_134_ + l_self_param_groups_0_params_135_ = L_self_param_groups_0_params_135_ + l_self_param_groups_0_params_136_ = L_self_param_groups_0_params_136_ + l_self_param_groups_0_params_137_ = L_self_param_groups_0_params_137_ + l_self_param_groups_0_params_138_ = L_self_param_groups_0_params_138_ + l_self_param_groups_0_params_139_ = L_self_param_groups_0_params_139_ + l_self_param_groups_0_params_140_ = L_self_param_groups_0_params_140_ + l_self_param_groups_0_params_141_ = L_self_param_groups_0_params_141_ + l_self_param_groups_0_params_142_ = L_self_param_groups_0_params_142_ + l_self_param_groups_0_params_143_ = L_self_param_groups_0_params_143_ + l_self_param_groups_0_params_144_ = L_self_param_groups_0_params_144_ + l_self_param_groups_0_params_145_ = L_self_param_groups_0_params_145_ + l_self_param_groups_0_params_146_ = L_self_param_groups_0_params_146_ + l_self_param_groups_0_params_147_ = L_self_param_groups_0_params_147_ + l_self_state_list_l_self_state_keys_1_step_ = L_self_state_list_L_self_state_keys_1_step_ + l_self_state_list_l_self_state_keys_1_exp_avg_ = L_self_state_list_L_self_state_keys_1_exp_avg_ + l_self_state_list_l_self_state_keys_1_exp_avg_sq_ = L_self_state_list_L_self_state_keys_1_exp_avg_sq_ + l_self_param_groups_0_params_0_grad = L_self_param_groups_0_params_0_grad + l_self_param_groups_0_params_1_grad = L_self_param_groups_0_params_1_grad + l_self_param_groups_0_params_2_grad = L_self_param_groups_0_params_2_grad + l_self_param_groups_0_params_3_grad = L_self_param_groups_0_params_3_grad + l_self_param_groups_0_params_4_grad = L_self_param_groups_0_params_4_grad + l_self_param_groups_0_params_5_grad = L_self_param_groups_0_params_5_grad + l_self_param_groups_0_params_6_grad = L_self_param_groups_0_params_6_grad + l_self_param_groups_0_params_7_grad = L_self_param_groups_0_params_7_grad + l_self_param_groups_0_params_8_grad = L_self_param_groups_0_params_8_grad + l_self_param_groups_0_params_9_grad = L_self_param_groups_0_params_9_grad + l_self_param_groups_0_params_10_grad = L_self_param_groups_0_params_10_grad + l_self_param_groups_0_params_11_grad = L_self_param_groups_0_params_11_grad + l_self_param_groups_0_params_12_grad = L_self_param_groups_0_params_12_grad + l_self_param_groups_0_params_13_grad = L_self_param_groups_0_params_13_grad + l_self_param_groups_0_params_14_grad = L_self_param_groups_0_params_14_grad + l_self_param_groups_0_params_15_grad = L_self_param_groups_0_params_15_grad + l_self_param_groups_0_params_16_grad = L_self_param_groups_0_params_16_grad + l_self_param_groups_0_params_17_grad = L_self_param_groups_0_params_17_grad + l_self_param_groups_0_params_18_grad = L_self_param_groups_0_params_18_grad + l_self_param_groups_0_params_19_grad = L_self_param_groups_0_params_19_grad + l_self_param_groups_0_params_20_grad = L_self_param_groups_0_params_20_grad + l_self_param_groups_0_params_21_grad = L_self_param_groups_0_params_21_grad + l_self_param_groups_0_params_22_grad = L_self_param_groups_0_params_22_grad + l_self_param_groups_0_params_23_grad = L_self_param_groups_0_params_23_grad + l_self_param_groups_0_params_24_grad = L_self_param_groups_0_params_24_grad + l_self_param_groups_0_params_25_grad = L_self_param_groups_0_params_25_grad + l_self_param_groups_0_params_26_grad = L_self_param_groups_0_params_26_grad + l_self_param_groups_0_params_27_grad = L_self_param_groups_0_params_27_grad + l_self_param_groups_0_params_28_grad = L_self_param_groups_0_params_28_grad + l_self_param_groups_0_params_29_grad = L_self_param_groups_0_params_29_grad + l_self_param_groups_0_params_30_grad = L_self_param_groups_0_params_30_grad + l_self_param_groups_0_params_31_grad = L_self_param_groups_0_params_31_grad + l_self_param_groups_0_params_32_grad = L_self_param_groups_0_params_32_grad + l_self_param_groups_0_params_33_grad = L_self_param_groups_0_params_33_grad + l_self_param_groups_0_params_34_grad = L_self_param_groups_0_params_34_grad + l_self_param_groups_0_params_35_grad = L_self_param_groups_0_params_35_grad + l_self_param_groups_0_params_36_grad = L_self_param_groups_0_params_36_grad + l_self_param_groups_0_params_37_grad = L_self_param_groups_0_params_37_grad + l_self_param_groups_0_params_38_grad = L_self_param_groups_0_params_38_grad + l_self_param_groups_0_params_39_grad = L_self_param_groups_0_params_39_grad + l_self_param_groups_0_params_40_grad = L_self_param_groups_0_params_40_grad + l_self_param_groups_0_params_41_grad = L_self_param_groups_0_params_41_grad + l_self_param_groups_0_params_42_grad = L_self_param_groups_0_params_42_grad + l_self_param_groups_0_params_43_grad = L_self_param_groups_0_params_43_grad + l_self_param_groups_0_params_44_grad = L_self_param_groups_0_params_44_grad + l_self_param_groups_0_params_45_grad = L_self_param_groups_0_params_45_grad + l_self_param_groups_0_params_46_grad = L_self_param_groups_0_params_46_grad + l_self_param_groups_0_params_47_grad = L_self_param_groups_0_params_47_grad + l_self_param_groups_0_params_48_grad = L_self_param_groups_0_params_48_grad + l_self_param_groups_0_params_49_grad = L_self_param_groups_0_params_49_grad + l_self_param_groups_0_params_50_grad = L_self_param_groups_0_params_50_grad + l_self_param_groups_0_params_51_grad = L_self_param_groups_0_params_51_grad + l_self_param_groups_0_params_52_grad = L_self_param_groups_0_params_52_grad + l_self_param_groups_0_params_53_grad = L_self_param_groups_0_params_53_grad + l_self_param_groups_0_params_54_grad = L_self_param_groups_0_params_54_grad + l_self_param_groups_0_params_55_grad = L_self_param_groups_0_params_55_grad + l_self_param_groups_0_params_56_grad = L_self_param_groups_0_params_56_grad + l_self_param_groups_0_params_57_grad = L_self_param_groups_0_params_57_grad + l_self_param_groups_0_params_58_grad = L_self_param_groups_0_params_58_grad + l_self_param_groups_0_params_59_grad = L_self_param_groups_0_params_59_grad + l_self_param_groups_0_params_60_grad = L_self_param_groups_0_params_60_grad + l_self_param_groups_0_params_61_grad = L_self_param_groups_0_params_61_grad + l_self_param_groups_0_params_62_grad = L_self_param_groups_0_params_62_grad + l_self_param_groups_0_params_63_grad = L_self_param_groups_0_params_63_grad + l_self_param_groups_0_params_64_grad = L_self_param_groups_0_params_64_grad + l_self_param_groups_0_params_65_grad = L_self_param_groups_0_params_65_grad + l_self_param_groups_0_params_66_grad = L_self_param_groups_0_params_66_grad + l_self_param_groups_0_params_67_grad = L_self_param_groups_0_params_67_grad + l_self_param_groups_0_params_68_grad = L_self_param_groups_0_params_68_grad + l_self_param_groups_0_params_69_grad = L_self_param_groups_0_params_69_grad + l_self_param_groups_0_params_70_grad = L_self_param_groups_0_params_70_grad + l_self_param_groups_0_params_71_grad = L_self_param_groups_0_params_71_grad + l_self_param_groups_0_params_72_grad = L_self_param_groups_0_params_72_grad + l_self_param_groups_0_params_73_grad = L_self_param_groups_0_params_73_grad + l_self_param_groups_0_params_74_grad = L_self_param_groups_0_params_74_grad + l_self_param_groups_0_params_75_grad = L_self_param_groups_0_params_75_grad + l_self_param_groups_0_params_76_grad = L_self_param_groups_0_params_76_grad + l_self_param_groups_0_params_77_grad = L_self_param_groups_0_params_77_grad + l_self_param_groups_0_params_78_grad = L_self_param_groups_0_params_78_grad + l_self_param_groups_0_params_79_grad = L_self_param_groups_0_params_79_grad + l_self_param_groups_0_params_80_grad = L_self_param_groups_0_params_80_grad + l_self_param_groups_0_params_81_grad = L_self_param_groups_0_params_81_grad + l_self_param_groups_0_params_82_grad = L_self_param_groups_0_params_82_grad + l_self_param_groups_0_params_83_grad = L_self_param_groups_0_params_83_grad + l_self_param_groups_0_params_84_grad = L_self_param_groups_0_params_84_grad + l_self_param_groups_0_params_85_grad = L_self_param_groups_0_params_85_grad + l_self_param_groups_0_params_86_grad = L_self_param_groups_0_params_86_grad + l_self_param_groups_0_params_87_grad = L_self_param_groups_0_params_87_grad + l_self_param_groups_0_params_88_grad = L_self_param_groups_0_params_88_grad + l_self_param_groups_0_params_89_grad = L_self_param_groups_0_params_89_grad + l_self_param_groups_0_params_90_grad = L_self_param_groups_0_params_90_grad + l_self_param_groups_0_params_91_grad = L_self_param_groups_0_params_91_grad + l_self_param_groups_0_params_92_grad = L_self_param_groups_0_params_92_grad + l_self_param_groups_0_params_93_grad = L_self_param_groups_0_params_93_grad + l_self_param_groups_0_params_94_grad = L_self_param_groups_0_params_94_grad + l_self_param_groups_0_params_95_grad = L_self_param_groups_0_params_95_grad + l_self_param_groups_0_params_96_grad = L_self_param_groups_0_params_96_grad + l_self_param_groups_0_params_97_grad = L_self_param_groups_0_params_97_grad + l_self_param_groups_0_params_98_grad = L_self_param_groups_0_params_98_grad + l_self_param_groups_0_params_99_grad = L_self_param_groups_0_params_99_grad + l_self_param_groups_0_params_100_grad = L_self_param_groups_0_params_100_grad + l_self_param_groups_0_params_101_grad = L_self_param_groups_0_params_101_grad + l_self_param_groups_0_params_102_grad = L_self_param_groups_0_params_102_grad + l_self_param_groups_0_params_103_grad = L_self_param_groups_0_params_103_grad + l_self_param_groups_0_params_104_grad = L_self_param_groups_0_params_104_grad + l_self_param_groups_0_params_105_grad = L_self_param_groups_0_params_105_grad + l_self_param_groups_0_params_106_grad = L_self_param_groups_0_params_106_grad + l_self_param_groups_0_params_107_grad = L_self_param_groups_0_params_107_grad + l_self_param_groups_0_params_108_grad = L_self_param_groups_0_params_108_grad + l_self_param_groups_0_params_109_grad = L_self_param_groups_0_params_109_grad + l_self_param_groups_0_params_110_grad = L_self_param_groups_0_params_110_grad + l_self_param_groups_0_params_111_grad = L_self_param_groups_0_params_111_grad + l_self_param_groups_0_params_112_grad = L_self_param_groups_0_params_112_grad + l_self_param_groups_0_params_113_grad = L_self_param_groups_0_params_113_grad + l_self_param_groups_0_params_114_grad = L_self_param_groups_0_params_114_grad + l_self_param_groups_0_params_115_grad = L_self_param_groups_0_params_115_grad + l_self_param_groups_0_params_116_grad = L_self_param_groups_0_params_116_grad + l_self_param_groups_0_params_117_grad = L_self_param_groups_0_params_117_grad + l_self_param_groups_0_params_118_grad = L_self_param_groups_0_params_118_grad + l_self_param_groups_0_params_119_grad = L_self_param_groups_0_params_119_grad + l_self_param_groups_0_params_120_grad = L_self_param_groups_0_params_120_grad + l_self_param_groups_0_params_121_grad = L_self_param_groups_0_params_121_grad + l_self_param_groups_0_params_122_grad = L_self_param_groups_0_params_122_grad + l_self_param_groups_0_params_123_grad = L_self_param_groups_0_params_123_grad + l_self_param_groups_0_params_124_grad = L_self_param_groups_0_params_124_grad + l_self_param_groups_0_params_125_grad = L_self_param_groups_0_params_125_grad + l_self_param_groups_0_params_126_grad = L_self_param_groups_0_params_126_grad + l_self_param_groups_0_params_127_grad = L_self_param_groups_0_params_127_grad + l_self_param_groups_0_params_128_grad = L_self_param_groups_0_params_128_grad + l_self_param_groups_0_params_129_grad = L_self_param_groups_0_params_129_grad + l_self_param_groups_0_params_130_grad = L_self_param_groups_0_params_130_grad + l_self_param_groups_0_params_131_grad = L_self_param_groups_0_params_131_grad + l_self_param_groups_0_params_132_grad = L_self_param_groups_0_params_132_grad + l_self_param_groups_0_params_133_grad = L_self_param_groups_0_params_133_grad + l_self_param_groups_0_params_134_grad = L_self_param_groups_0_params_134_grad + l_self_param_groups_0_params_135_grad = L_self_param_groups_0_params_135_grad + l_self_param_groups_0_params_136_grad = L_self_param_groups_0_params_136_grad + l_self_param_groups_0_params_137_grad = L_self_param_groups_0_params_137_grad + l_self_param_groups_0_params_138_grad = L_self_param_groups_0_params_138_grad + l_self_param_groups_0_params_139_grad = L_self_param_groups_0_params_139_grad + l_self_param_groups_0_params_140_grad = L_self_param_groups_0_params_140_grad + l_self_param_groups_0_params_141_grad = L_self_param_groups_0_params_141_grad + l_self_param_groups_0_params_142_grad = L_self_param_groups_0_params_142_grad + l_self_param_groups_0_params_143_grad = L_self_param_groups_0_params_143_grad + l_self_param_groups_0_params_144_grad = L_self_param_groups_0_params_144_grad + l_self_param_groups_0_params_145_grad = L_self_param_groups_0_params_145_grad + l_self_param_groups_0_params_146_grad = L_self_param_groups_0_params_146_grad + l_self_param_groups_0_params_147_grad = L_self_param_groups_0_params_147_grad + l_self_state_list_l_self_state_keys_0_exp_avg_ = L_self_state_list_L_self_state_keys_0_exp_avg_ + l_self_state_list_l_self_state_keys_2_exp_avg_ = L_self_state_list_L_self_state_keys_2_exp_avg_ + l_self_state_list_l_self_state_keys_3_exp_avg_ = L_self_state_list_L_self_state_keys_3_exp_avg_ + l_self_state_list_l_self_state_keys_4_exp_avg_ = L_self_state_list_L_self_state_keys_4_exp_avg_ + l_self_state_list_l_self_state_keys_5_exp_avg_ = L_self_state_list_L_self_state_keys_5_exp_avg_ + l_self_state_list_l_self_state_keys_6_exp_avg_ = L_self_state_list_L_self_state_keys_6_exp_avg_ + l_self_state_list_l_self_state_keys_7_exp_avg_ = L_self_state_list_L_self_state_keys_7_exp_avg_ + l_self_state_list_l_self_state_keys_8_exp_avg_ = L_self_state_list_L_self_state_keys_8_exp_avg_ + l_self_state_list_l_self_state_keys_9_exp_avg_ = L_self_state_list_L_self_state_keys_9_exp_avg_ + l_self_state_list_l_self_state_keys_10_exp_avg_ = L_self_state_list_L_self_state_keys_10_exp_avg_ + l_self_state_list_l_self_state_keys_11_exp_avg_ = L_self_state_list_L_self_state_keys_11_exp_avg_ + l_self_state_list_l_self_state_keys_12_exp_avg_ = L_self_state_list_L_self_state_keys_12_exp_avg_ + l_self_state_list_l_self_state_keys_13_exp_avg_ = L_self_state_list_L_self_state_keys_13_exp_avg_ + l_self_state_list_l_self_state_keys_14_exp_avg_ = L_self_state_list_L_self_state_keys_14_exp_avg_ + l_self_state_list_l_self_state_keys_15_exp_avg_ = L_self_state_list_L_self_state_keys_15_exp_avg_ + l_self_state_list_l_self_state_keys_16_exp_avg_ = L_self_state_list_L_self_state_keys_16_exp_avg_ + l_self_state_list_l_self_state_keys_17_exp_avg_ = L_self_state_list_L_self_state_keys_17_exp_avg_ + l_self_state_list_l_self_state_keys_18_exp_avg_ = L_self_state_list_L_self_state_keys_18_exp_avg_ + l_self_state_list_l_self_state_keys_19_exp_avg_ = L_self_state_list_L_self_state_keys_19_exp_avg_ + l_self_state_list_l_self_state_keys_20_exp_avg_ = L_self_state_list_L_self_state_keys_20_exp_avg_ + l_self_state_list_l_self_state_keys_21_exp_avg_ = L_self_state_list_L_self_state_keys_21_exp_avg_ + l_self_state_list_l_self_state_keys_22_exp_avg_ = L_self_state_list_L_self_state_keys_22_exp_avg_ + l_self_state_list_l_self_state_keys_23_exp_avg_ = L_self_state_list_L_self_state_keys_23_exp_avg_ + l_self_state_list_l_self_state_keys_24_exp_avg_ = L_self_state_list_L_self_state_keys_24_exp_avg_ + l_self_state_list_l_self_state_keys_25_exp_avg_ = L_self_state_list_L_self_state_keys_25_exp_avg_ + l_self_state_list_l_self_state_keys_26_exp_avg_ = L_self_state_list_L_self_state_keys_26_exp_avg_ + l_self_state_list_l_self_state_keys_27_exp_avg_ = L_self_state_list_L_self_state_keys_27_exp_avg_ + l_self_state_list_l_self_state_keys_28_exp_avg_ = L_self_state_list_L_self_state_keys_28_exp_avg_ + l_self_state_list_l_self_state_keys_29_exp_avg_ = L_self_state_list_L_self_state_keys_29_exp_avg_ + l_self_state_list_l_self_state_keys_30_exp_avg_ = L_self_state_list_L_self_state_keys_30_exp_avg_ + l_self_state_list_l_self_state_keys_31_exp_avg_ = L_self_state_list_L_self_state_keys_31_exp_avg_ + l_self_state_list_l_self_state_keys_32_exp_avg_ = L_self_state_list_L_self_state_keys_32_exp_avg_ + l_self_state_list_l_self_state_keys_33_exp_avg_ = L_self_state_list_L_self_state_keys_33_exp_avg_ + l_self_state_list_l_self_state_keys_34_exp_avg_ = L_self_state_list_L_self_state_keys_34_exp_avg_ + l_self_state_list_l_self_state_keys_35_exp_avg_ = L_self_state_list_L_self_state_keys_35_exp_avg_ + l_self_state_list_l_self_state_keys_36_exp_avg_ = L_self_state_list_L_self_state_keys_36_exp_avg_ + l_self_state_list_l_self_state_keys_37_exp_avg_ = L_self_state_list_L_self_state_keys_37_exp_avg_ + l_self_state_list_l_self_state_keys_38_exp_avg_ = L_self_state_list_L_self_state_keys_38_exp_avg_ + l_self_state_list_l_self_state_keys_39_exp_avg_ = L_self_state_list_L_self_state_keys_39_exp_avg_ + l_self_state_list_l_self_state_keys_40_exp_avg_ = L_self_state_list_L_self_state_keys_40_exp_avg_ + l_self_state_list_l_self_state_keys_41_exp_avg_ = L_self_state_list_L_self_state_keys_41_exp_avg_ + l_self_state_list_l_self_state_keys_42_exp_avg_ = L_self_state_list_L_self_state_keys_42_exp_avg_ + l_self_state_list_l_self_state_keys_43_exp_avg_ = L_self_state_list_L_self_state_keys_43_exp_avg_ + l_self_state_list_l_self_state_keys_44_exp_avg_ = L_self_state_list_L_self_state_keys_44_exp_avg_ + l_self_state_list_l_self_state_keys_45_exp_avg_ = L_self_state_list_L_self_state_keys_45_exp_avg_ + l_self_state_list_l_self_state_keys_46_exp_avg_ = L_self_state_list_L_self_state_keys_46_exp_avg_ + l_self_state_list_l_self_state_keys_47_exp_avg_ = L_self_state_list_L_self_state_keys_47_exp_avg_ + l_self_state_list_l_self_state_keys_48_exp_avg_ = L_self_state_list_L_self_state_keys_48_exp_avg_ + l_self_state_list_l_self_state_keys_49_exp_avg_ = L_self_state_list_L_self_state_keys_49_exp_avg_ + l_self_state_list_l_self_state_keys_50_exp_avg_ = L_self_state_list_L_self_state_keys_50_exp_avg_ + l_self_state_list_l_self_state_keys_51_exp_avg_ = L_self_state_list_L_self_state_keys_51_exp_avg_ + l_self_state_list_l_self_state_keys_52_exp_avg_ = L_self_state_list_L_self_state_keys_52_exp_avg_ + l_self_state_list_l_self_state_keys_53_exp_avg_ = L_self_state_list_L_self_state_keys_53_exp_avg_ + l_self_state_list_l_self_state_keys_54_exp_avg_ = L_self_state_list_L_self_state_keys_54_exp_avg_ + l_self_state_list_l_self_state_keys_55_exp_avg_ = L_self_state_list_L_self_state_keys_55_exp_avg_ + l_self_state_list_l_self_state_keys_56_exp_avg_ = L_self_state_list_L_self_state_keys_56_exp_avg_ + l_self_state_list_l_self_state_keys_57_exp_avg_ = L_self_state_list_L_self_state_keys_57_exp_avg_ + l_self_state_list_l_self_state_keys_58_exp_avg_ = L_self_state_list_L_self_state_keys_58_exp_avg_ + l_self_state_list_l_self_state_keys_59_exp_avg_ = L_self_state_list_L_self_state_keys_59_exp_avg_ + l_self_state_list_l_self_state_keys_60_exp_avg_ = L_self_state_list_L_self_state_keys_60_exp_avg_ + l_self_state_list_l_self_state_keys_61_exp_avg_ = L_self_state_list_L_self_state_keys_61_exp_avg_ + l_self_state_list_l_self_state_keys_62_exp_avg_ = L_self_state_list_L_self_state_keys_62_exp_avg_ + l_self_state_list_l_self_state_keys_63_exp_avg_ = L_self_state_list_L_self_state_keys_63_exp_avg_ + l_self_state_list_l_self_state_keys_64_exp_avg_ = L_self_state_list_L_self_state_keys_64_exp_avg_ + l_self_state_list_l_self_state_keys_65_exp_avg_ = L_self_state_list_L_self_state_keys_65_exp_avg_ + l_self_state_list_l_self_state_keys_66_exp_avg_ = L_self_state_list_L_self_state_keys_66_exp_avg_ + l_self_state_list_l_self_state_keys_67_exp_avg_ = L_self_state_list_L_self_state_keys_67_exp_avg_ + l_self_state_list_l_self_state_keys_68_exp_avg_ = L_self_state_list_L_self_state_keys_68_exp_avg_ + l_self_state_list_l_self_state_keys_69_exp_avg_ = L_self_state_list_L_self_state_keys_69_exp_avg_ + l_self_state_list_l_self_state_keys_70_exp_avg_ = L_self_state_list_L_self_state_keys_70_exp_avg_ + l_self_state_list_l_self_state_keys_71_exp_avg_ = L_self_state_list_L_self_state_keys_71_exp_avg_ + l_self_state_list_l_self_state_keys_72_exp_avg_ = L_self_state_list_L_self_state_keys_72_exp_avg_ + l_self_state_list_l_self_state_keys_73_exp_avg_ = L_self_state_list_L_self_state_keys_73_exp_avg_ + l_self_state_list_l_self_state_keys_74_exp_avg_ = L_self_state_list_L_self_state_keys_74_exp_avg_ + l_self_state_list_l_self_state_keys_75_exp_avg_ = L_self_state_list_L_self_state_keys_75_exp_avg_ + l_self_state_list_l_self_state_keys_76_exp_avg_ = L_self_state_list_L_self_state_keys_76_exp_avg_ + l_self_state_list_l_self_state_keys_77_exp_avg_ = L_self_state_list_L_self_state_keys_77_exp_avg_ + l_self_state_list_l_self_state_keys_78_exp_avg_ = L_self_state_list_L_self_state_keys_78_exp_avg_ + l_self_state_list_l_self_state_keys_79_exp_avg_ = L_self_state_list_L_self_state_keys_79_exp_avg_ + l_self_state_list_l_self_state_keys_80_exp_avg_ = L_self_state_list_L_self_state_keys_80_exp_avg_ + l_self_state_list_l_self_state_keys_81_exp_avg_ = L_self_state_list_L_self_state_keys_81_exp_avg_ + l_self_state_list_l_self_state_keys_82_exp_avg_ = L_self_state_list_L_self_state_keys_82_exp_avg_ + l_self_state_list_l_self_state_keys_83_exp_avg_ = L_self_state_list_L_self_state_keys_83_exp_avg_ + l_self_state_list_l_self_state_keys_84_exp_avg_ = L_self_state_list_L_self_state_keys_84_exp_avg_ + l_self_state_list_l_self_state_keys_85_exp_avg_ = L_self_state_list_L_self_state_keys_85_exp_avg_ + l_self_state_list_l_self_state_keys_86_exp_avg_ = L_self_state_list_L_self_state_keys_86_exp_avg_ + l_self_state_list_l_self_state_keys_87_exp_avg_ = L_self_state_list_L_self_state_keys_87_exp_avg_ + l_self_state_list_l_self_state_keys_88_exp_avg_ = L_self_state_list_L_self_state_keys_88_exp_avg_ + l_self_state_list_l_self_state_keys_89_exp_avg_ = L_self_state_list_L_self_state_keys_89_exp_avg_ + l_self_state_list_l_self_state_keys_90_exp_avg_ = L_self_state_list_L_self_state_keys_90_exp_avg_ + l_self_state_list_l_self_state_keys_91_exp_avg_ = L_self_state_list_L_self_state_keys_91_exp_avg_ + l_self_state_list_l_self_state_keys_92_exp_avg_ = L_self_state_list_L_self_state_keys_92_exp_avg_ + l_self_state_list_l_self_state_keys_93_exp_avg_ = L_self_state_list_L_self_state_keys_93_exp_avg_ + l_self_state_list_l_self_state_keys_94_exp_avg_ = L_self_state_list_L_self_state_keys_94_exp_avg_ + l_self_state_list_l_self_state_keys_95_exp_avg_ = L_self_state_list_L_self_state_keys_95_exp_avg_ + l_self_state_list_l_self_state_keys_96_exp_avg_ = L_self_state_list_L_self_state_keys_96_exp_avg_ + l_self_state_list_l_self_state_keys_97_exp_avg_ = L_self_state_list_L_self_state_keys_97_exp_avg_ + l_self_state_list_l_self_state_keys_98_exp_avg_ = L_self_state_list_L_self_state_keys_98_exp_avg_ + l_self_state_list_l_self_state_keys_99_exp_avg_ = L_self_state_list_L_self_state_keys_99_exp_avg_ + l_self_state_list_l_self_state_keys_100_exp_avg_ = L_self_state_list_L_self_state_keys_100_exp_avg_ + l_self_state_list_l_self_state_keys_101_exp_avg_ = L_self_state_list_L_self_state_keys_101_exp_avg_ + l_self_state_list_l_self_state_keys_102_exp_avg_ = L_self_state_list_L_self_state_keys_102_exp_avg_ + l_self_state_list_l_self_state_keys_103_exp_avg_ = L_self_state_list_L_self_state_keys_103_exp_avg_ + l_self_state_list_l_self_state_keys_104_exp_avg_ = L_self_state_list_L_self_state_keys_104_exp_avg_ + l_self_state_list_l_self_state_keys_105_exp_avg_ = L_self_state_list_L_self_state_keys_105_exp_avg_ + l_self_state_list_l_self_state_keys_106_exp_avg_ = L_self_state_list_L_self_state_keys_106_exp_avg_ + l_self_state_list_l_self_state_keys_107_exp_avg_ = L_self_state_list_L_self_state_keys_107_exp_avg_ + l_self_state_list_l_self_state_keys_108_exp_avg_ = L_self_state_list_L_self_state_keys_108_exp_avg_ + l_self_state_list_l_self_state_keys_109_exp_avg_ = L_self_state_list_L_self_state_keys_109_exp_avg_ + l_self_state_list_l_self_state_keys_110_exp_avg_ = L_self_state_list_L_self_state_keys_110_exp_avg_ + l_self_state_list_l_self_state_keys_111_exp_avg_ = L_self_state_list_L_self_state_keys_111_exp_avg_ + l_self_state_list_l_self_state_keys_112_exp_avg_ = L_self_state_list_L_self_state_keys_112_exp_avg_ + l_self_state_list_l_self_state_keys_113_exp_avg_ = L_self_state_list_L_self_state_keys_113_exp_avg_ + l_self_state_list_l_self_state_keys_114_exp_avg_ = L_self_state_list_L_self_state_keys_114_exp_avg_ + l_self_state_list_l_self_state_keys_115_exp_avg_ = L_self_state_list_L_self_state_keys_115_exp_avg_ + l_self_state_list_l_self_state_keys_116_exp_avg_ = L_self_state_list_L_self_state_keys_116_exp_avg_ + l_self_state_list_l_self_state_keys_117_exp_avg_ = L_self_state_list_L_self_state_keys_117_exp_avg_ + l_self_state_list_l_self_state_keys_118_exp_avg_ = L_self_state_list_L_self_state_keys_118_exp_avg_ + l_self_state_list_l_self_state_keys_119_exp_avg_ = L_self_state_list_L_self_state_keys_119_exp_avg_ + l_self_state_list_l_self_state_keys_120_exp_avg_ = L_self_state_list_L_self_state_keys_120_exp_avg_ + l_self_state_list_l_self_state_keys_121_exp_avg_ = L_self_state_list_L_self_state_keys_121_exp_avg_ + l_self_state_list_l_self_state_keys_122_exp_avg_ = L_self_state_list_L_self_state_keys_122_exp_avg_ + l_self_state_list_l_self_state_keys_123_exp_avg_ = L_self_state_list_L_self_state_keys_123_exp_avg_ + l_self_state_list_l_self_state_keys_124_exp_avg_ = L_self_state_list_L_self_state_keys_124_exp_avg_ + l_self_state_list_l_self_state_keys_125_exp_avg_ = L_self_state_list_L_self_state_keys_125_exp_avg_ + l_self_state_list_l_self_state_keys_126_exp_avg_ = L_self_state_list_L_self_state_keys_126_exp_avg_ + l_self_state_list_l_self_state_keys_127_exp_avg_ = L_self_state_list_L_self_state_keys_127_exp_avg_ + l_self_state_list_l_self_state_keys_128_exp_avg_ = L_self_state_list_L_self_state_keys_128_exp_avg_ + l_self_state_list_l_self_state_keys_129_exp_avg_ = L_self_state_list_L_self_state_keys_129_exp_avg_ + l_self_state_list_l_self_state_keys_130_exp_avg_ = L_self_state_list_L_self_state_keys_130_exp_avg_ + l_self_state_list_l_self_state_keys_131_exp_avg_ = L_self_state_list_L_self_state_keys_131_exp_avg_ + l_self_state_list_l_self_state_keys_132_exp_avg_ = L_self_state_list_L_self_state_keys_132_exp_avg_ + l_self_state_list_l_self_state_keys_133_exp_avg_ = L_self_state_list_L_self_state_keys_133_exp_avg_ + l_self_state_list_l_self_state_keys_134_exp_avg_ = L_self_state_list_L_self_state_keys_134_exp_avg_ + l_self_state_list_l_self_state_keys_135_exp_avg_ = L_self_state_list_L_self_state_keys_135_exp_avg_ + l_self_state_list_l_self_state_keys_136_exp_avg_ = L_self_state_list_L_self_state_keys_136_exp_avg_ + l_self_state_list_l_self_state_keys_137_exp_avg_ = L_self_state_list_L_self_state_keys_137_exp_avg_ + l_self_state_list_l_self_state_keys_138_exp_avg_ = L_self_state_list_L_self_state_keys_138_exp_avg_ + l_self_state_list_l_self_state_keys_139_exp_avg_ = L_self_state_list_L_self_state_keys_139_exp_avg_ + l_self_state_list_l_self_state_keys_140_exp_avg_ = L_self_state_list_L_self_state_keys_140_exp_avg_ + l_self_state_list_l_self_state_keys_141_exp_avg_ = L_self_state_list_L_self_state_keys_141_exp_avg_ + l_self_state_list_l_self_state_keys_142_exp_avg_ = L_self_state_list_L_self_state_keys_142_exp_avg_ + l_self_state_list_l_self_state_keys_143_exp_avg_ = L_self_state_list_L_self_state_keys_143_exp_avg_ + l_self_state_list_l_self_state_keys_144_exp_avg_ = L_self_state_list_L_self_state_keys_144_exp_avg_ + l_self_state_list_l_self_state_keys_145_exp_avg_ = L_self_state_list_L_self_state_keys_145_exp_avg_ + l_self_state_list_l_self_state_keys_146_exp_avg_ = L_self_state_list_L_self_state_keys_146_exp_avg_ + l_self_state_list_l_self_state_keys_147_exp_avg_ = L_self_state_list_L_self_state_keys_147_exp_avg_ + l_self_state_list_l_self_state_keys_0_exp_avg_sq_ = L_self_state_list_L_self_state_keys_0_exp_avg_sq_ + l_self_state_list_l_self_state_keys_2_exp_avg_sq_ = L_self_state_list_L_self_state_keys_2_exp_avg_sq_ + l_self_state_list_l_self_state_keys_3_exp_avg_sq_ = L_self_state_list_L_self_state_keys_3_exp_avg_sq_ + l_self_state_list_l_self_state_keys_4_exp_avg_sq_ = L_self_state_list_L_self_state_keys_4_exp_avg_sq_ + l_self_state_list_l_self_state_keys_5_exp_avg_sq_ = L_self_state_list_L_self_state_keys_5_exp_avg_sq_ + l_self_state_list_l_self_state_keys_6_exp_avg_sq_ = L_self_state_list_L_self_state_keys_6_exp_avg_sq_ + l_self_state_list_l_self_state_keys_7_exp_avg_sq_ = L_self_state_list_L_self_state_keys_7_exp_avg_sq_ + l_self_state_list_l_self_state_keys_8_exp_avg_sq_ = L_self_state_list_L_self_state_keys_8_exp_avg_sq_ + l_self_state_list_l_self_state_keys_9_exp_avg_sq_ = L_self_state_list_L_self_state_keys_9_exp_avg_sq_ + l_self_state_list_l_self_state_keys_10_exp_avg_sq_ = L_self_state_list_L_self_state_keys_10_exp_avg_sq_ + l_self_state_list_l_self_state_keys_11_exp_avg_sq_ = L_self_state_list_L_self_state_keys_11_exp_avg_sq_ + l_self_state_list_l_self_state_keys_12_exp_avg_sq_ = L_self_state_list_L_self_state_keys_12_exp_avg_sq_ + l_self_state_list_l_self_state_keys_13_exp_avg_sq_ = L_self_state_list_L_self_state_keys_13_exp_avg_sq_ + l_self_state_list_l_self_state_keys_14_exp_avg_sq_ = L_self_state_list_L_self_state_keys_14_exp_avg_sq_ + l_self_state_list_l_self_state_keys_15_exp_avg_sq_ = L_self_state_list_L_self_state_keys_15_exp_avg_sq_ + l_self_state_list_l_self_state_keys_16_exp_avg_sq_ = L_self_state_list_L_self_state_keys_16_exp_avg_sq_ + l_self_state_list_l_self_state_keys_17_exp_avg_sq_ = L_self_state_list_L_self_state_keys_17_exp_avg_sq_ + l_self_state_list_l_self_state_keys_18_exp_avg_sq_ = L_self_state_list_L_self_state_keys_18_exp_avg_sq_ + l_self_state_list_l_self_state_keys_19_exp_avg_sq_ = L_self_state_list_L_self_state_keys_19_exp_avg_sq_ + l_self_state_list_l_self_state_keys_20_exp_avg_sq_ = L_self_state_list_L_self_state_keys_20_exp_avg_sq_ + l_self_state_list_l_self_state_keys_21_exp_avg_sq_ = L_self_state_list_L_self_state_keys_21_exp_avg_sq_ + l_self_state_list_l_self_state_keys_22_exp_avg_sq_ = L_self_state_list_L_self_state_keys_22_exp_avg_sq_ + l_self_state_list_l_self_state_keys_23_exp_avg_sq_ = L_self_state_list_L_self_state_keys_23_exp_avg_sq_ + l_self_state_list_l_self_state_keys_24_exp_avg_sq_ = L_self_state_list_L_self_state_keys_24_exp_avg_sq_ + l_self_state_list_l_self_state_keys_25_exp_avg_sq_ = L_self_state_list_L_self_state_keys_25_exp_avg_sq_ + l_self_state_list_l_self_state_keys_26_exp_avg_sq_ = L_self_state_list_L_self_state_keys_26_exp_avg_sq_ + l_self_state_list_l_self_state_keys_27_exp_avg_sq_ = L_self_state_list_L_self_state_keys_27_exp_avg_sq_ + l_self_state_list_l_self_state_keys_28_exp_avg_sq_ = L_self_state_list_L_self_state_keys_28_exp_avg_sq_ + l_self_state_list_l_self_state_keys_29_exp_avg_sq_ = L_self_state_list_L_self_state_keys_29_exp_avg_sq_ + l_self_state_list_l_self_state_keys_30_exp_avg_sq_ = L_self_state_list_L_self_state_keys_30_exp_avg_sq_ + l_self_state_list_l_self_state_keys_31_exp_avg_sq_ = L_self_state_list_L_self_state_keys_31_exp_avg_sq_ + l_self_state_list_l_self_state_keys_32_exp_avg_sq_ = L_self_state_list_L_self_state_keys_32_exp_avg_sq_ + l_self_state_list_l_self_state_keys_33_exp_avg_sq_ = L_self_state_list_L_self_state_keys_33_exp_avg_sq_ + l_self_state_list_l_self_state_keys_34_exp_avg_sq_ = L_self_state_list_L_self_state_keys_34_exp_avg_sq_ + l_self_state_list_l_self_state_keys_35_exp_avg_sq_ = L_self_state_list_L_self_state_keys_35_exp_avg_sq_ + l_self_state_list_l_self_state_keys_36_exp_avg_sq_ = L_self_state_list_L_self_state_keys_36_exp_avg_sq_ + l_self_state_list_l_self_state_keys_37_exp_avg_sq_ = L_self_state_list_L_self_state_keys_37_exp_avg_sq_ + l_self_state_list_l_self_state_keys_38_exp_avg_sq_ = L_self_state_list_L_self_state_keys_38_exp_avg_sq_ + l_self_state_list_l_self_state_keys_39_exp_avg_sq_ = L_self_state_list_L_self_state_keys_39_exp_avg_sq_ + l_self_state_list_l_self_state_keys_40_exp_avg_sq_ = L_self_state_list_L_self_state_keys_40_exp_avg_sq_ + l_self_state_list_l_self_state_keys_41_exp_avg_sq_ = L_self_state_list_L_self_state_keys_41_exp_avg_sq_ + l_self_state_list_l_self_state_keys_42_exp_avg_sq_ = L_self_state_list_L_self_state_keys_42_exp_avg_sq_ + l_self_state_list_l_self_state_keys_43_exp_avg_sq_ = L_self_state_list_L_self_state_keys_43_exp_avg_sq_ + l_self_state_list_l_self_state_keys_44_exp_avg_sq_ = L_self_state_list_L_self_state_keys_44_exp_avg_sq_ + l_self_state_list_l_self_state_keys_45_exp_avg_sq_ = L_self_state_list_L_self_state_keys_45_exp_avg_sq_ + l_self_state_list_l_self_state_keys_46_exp_avg_sq_ = L_self_state_list_L_self_state_keys_46_exp_avg_sq_ + l_self_state_list_l_self_state_keys_47_exp_avg_sq_ = L_self_state_list_L_self_state_keys_47_exp_avg_sq_ + l_self_state_list_l_self_state_keys_48_exp_avg_sq_ = L_self_state_list_L_self_state_keys_48_exp_avg_sq_ + l_self_state_list_l_self_state_keys_49_exp_avg_sq_ = L_self_state_list_L_self_state_keys_49_exp_avg_sq_ + l_self_state_list_l_self_state_keys_50_exp_avg_sq_ = L_self_state_list_L_self_state_keys_50_exp_avg_sq_ + l_self_state_list_l_self_state_keys_51_exp_avg_sq_ = L_self_state_list_L_self_state_keys_51_exp_avg_sq_ + l_self_state_list_l_self_state_keys_52_exp_avg_sq_ = L_self_state_list_L_self_state_keys_52_exp_avg_sq_ + l_self_state_list_l_self_state_keys_53_exp_avg_sq_ = L_self_state_list_L_self_state_keys_53_exp_avg_sq_ + l_self_state_list_l_self_state_keys_54_exp_avg_sq_ = L_self_state_list_L_self_state_keys_54_exp_avg_sq_ + l_self_state_list_l_self_state_keys_55_exp_avg_sq_ = L_self_state_list_L_self_state_keys_55_exp_avg_sq_ + l_self_state_list_l_self_state_keys_56_exp_avg_sq_ = L_self_state_list_L_self_state_keys_56_exp_avg_sq_ + l_self_state_list_l_self_state_keys_57_exp_avg_sq_ = L_self_state_list_L_self_state_keys_57_exp_avg_sq_ + l_self_state_list_l_self_state_keys_58_exp_avg_sq_ = L_self_state_list_L_self_state_keys_58_exp_avg_sq_ + l_self_state_list_l_self_state_keys_59_exp_avg_sq_ = L_self_state_list_L_self_state_keys_59_exp_avg_sq_ + l_self_state_list_l_self_state_keys_60_exp_avg_sq_ = L_self_state_list_L_self_state_keys_60_exp_avg_sq_ + l_self_state_list_l_self_state_keys_61_exp_avg_sq_ = L_self_state_list_L_self_state_keys_61_exp_avg_sq_ + l_self_state_list_l_self_state_keys_62_exp_avg_sq_ = L_self_state_list_L_self_state_keys_62_exp_avg_sq_ + l_self_state_list_l_self_state_keys_63_exp_avg_sq_ = L_self_state_list_L_self_state_keys_63_exp_avg_sq_ + l_self_state_list_l_self_state_keys_64_exp_avg_sq_ = L_self_state_list_L_self_state_keys_64_exp_avg_sq_ + l_self_state_list_l_self_state_keys_65_exp_avg_sq_ = L_self_state_list_L_self_state_keys_65_exp_avg_sq_ + l_self_state_list_l_self_state_keys_66_exp_avg_sq_ = L_self_state_list_L_self_state_keys_66_exp_avg_sq_ + l_self_state_list_l_self_state_keys_67_exp_avg_sq_ = L_self_state_list_L_self_state_keys_67_exp_avg_sq_ + l_self_state_list_l_self_state_keys_68_exp_avg_sq_ = L_self_state_list_L_self_state_keys_68_exp_avg_sq_ + l_self_state_list_l_self_state_keys_69_exp_avg_sq_ = L_self_state_list_L_self_state_keys_69_exp_avg_sq_ + l_self_state_list_l_self_state_keys_70_exp_avg_sq_ = L_self_state_list_L_self_state_keys_70_exp_avg_sq_ + l_self_state_list_l_self_state_keys_71_exp_avg_sq_ = L_self_state_list_L_self_state_keys_71_exp_avg_sq_ + l_self_state_list_l_self_state_keys_72_exp_avg_sq_ = L_self_state_list_L_self_state_keys_72_exp_avg_sq_ + l_self_state_list_l_self_state_keys_73_exp_avg_sq_ = L_self_state_list_L_self_state_keys_73_exp_avg_sq_ + l_self_state_list_l_self_state_keys_74_exp_avg_sq_ = L_self_state_list_L_self_state_keys_74_exp_avg_sq_ + l_self_state_list_l_self_state_keys_75_exp_avg_sq_ = L_self_state_list_L_self_state_keys_75_exp_avg_sq_ + l_self_state_list_l_self_state_keys_76_exp_avg_sq_ = L_self_state_list_L_self_state_keys_76_exp_avg_sq_ + l_self_state_list_l_self_state_keys_77_exp_avg_sq_ = L_self_state_list_L_self_state_keys_77_exp_avg_sq_ + l_self_state_list_l_self_state_keys_78_exp_avg_sq_ = L_self_state_list_L_self_state_keys_78_exp_avg_sq_ + l_self_state_list_l_self_state_keys_79_exp_avg_sq_ = L_self_state_list_L_self_state_keys_79_exp_avg_sq_ + l_self_state_list_l_self_state_keys_80_exp_avg_sq_ = L_self_state_list_L_self_state_keys_80_exp_avg_sq_ + l_self_state_list_l_self_state_keys_81_exp_avg_sq_ = L_self_state_list_L_self_state_keys_81_exp_avg_sq_ + l_self_state_list_l_self_state_keys_82_exp_avg_sq_ = L_self_state_list_L_self_state_keys_82_exp_avg_sq_ + l_self_state_list_l_self_state_keys_83_exp_avg_sq_ = L_self_state_list_L_self_state_keys_83_exp_avg_sq_ + l_self_state_list_l_self_state_keys_84_exp_avg_sq_ = L_self_state_list_L_self_state_keys_84_exp_avg_sq_ + l_self_state_list_l_self_state_keys_85_exp_avg_sq_ = L_self_state_list_L_self_state_keys_85_exp_avg_sq_ + l_self_state_list_l_self_state_keys_86_exp_avg_sq_ = L_self_state_list_L_self_state_keys_86_exp_avg_sq_ + l_self_state_list_l_self_state_keys_87_exp_avg_sq_ = L_self_state_list_L_self_state_keys_87_exp_avg_sq_ + l_self_state_list_l_self_state_keys_88_exp_avg_sq_ = L_self_state_list_L_self_state_keys_88_exp_avg_sq_ + l_self_state_list_l_self_state_keys_89_exp_avg_sq_ = L_self_state_list_L_self_state_keys_89_exp_avg_sq_ + l_self_state_list_l_self_state_keys_90_exp_avg_sq_ = L_self_state_list_L_self_state_keys_90_exp_avg_sq_ + l_self_state_list_l_self_state_keys_91_exp_avg_sq_ = L_self_state_list_L_self_state_keys_91_exp_avg_sq_ + l_self_state_list_l_self_state_keys_92_exp_avg_sq_ = L_self_state_list_L_self_state_keys_92_exp_avg_sq_ + l_self_state_list_l_self_state_keys_93_exp_avg_sq_ = L_self_state_list_L_self_state_keys_93_exp_avg_sq_ + l_self_state_list_l_self_state_keys_94_exp_avg_sq_ = L_self_state_list_L_self_state_keys_94_exp_avg_sq_ + l_self_state_list_l_self_state_keys_95_exp_avg_sq_ = L_self_state_list_L_self_state_keys_95_exp_avg_sq_ + l_self_state_list_l_self_state_keys_96_exp_avg_sq_ = L_self_state_list_L_self_state_keys_96_exp_avg_sq_ + l_self_state_list_l_self_state_keys_97_exp_avg_sq_ = L_self_state_list_L_self_state_keys_97_exp_avg_sq_ + l_self_state_list_l_self_state_keys_98_exp_avg_sq_ = L_self_state_list_L_self_state_keys_98_exp_avg_sq_ + l_self_state_list_l_self_state_keys_99_exp_avg_sq_ = L_self_state_list_L_self_state_keys_99_exp_avg_sq_ + l_self_state_list_l_self_state_keys_100_exp_avg_sq_ = L_self_state_list_L_self_state_keys_100_exp_avg_sq_ + l_self_state_list_l_self_state_keys_101_exp_avg_sq_ = L_self_state_list_L_self_state_keys_101_exp_avg_sq_ + l_self_state_list_l_self_state_keys_102_exp_avg_sq_ = L_self_state_list_L_self_state_keys_102_exp_avg_sq_ + l_self_state_list_l_self_state_keys_103_exp_avg_sq_ = L_self_state_list_L_self_state_keys_103_exp_avg_sq_ + l_self_state_list_l_self_state_keys_104_exp_avg_sq_ = L_self_state_list_L_self_state_keys_104_exp_avg_sq_ + l_self_state_list_l_self_state_keys_105_exp_avg_sq_ = L_self_state_list_L_self_state_keys_105_exp_avg_sq_ + l_self_state_list_l_self_state_keys_106_exp_avg_sq_ = L_self_state_list_L_self_state_keys_106_exp_avg_sq_ + l_self_state_list_l_self_state_keys_107_exp_avg_sq_ = L_self_state_list_L_self_state_keys_107_exp_avg_sq_ + l_self_state_list_l_self_state_keys_108_exp_avg_sq_ = L_self_state_list_L_self_state_keys_108_exp_avg_sq_ + l_self_state_list_l_self_state_keys_109_exp_avg_sq_ = L_self_state_list_L_self_state_keys_109_exp_avg_sq_ + l_self_state_list_l_self_state_keys_110_exp_avg_sq_ = L_self_state_list_L_self_state_keys_110_exp_avg_sq_ + l_self_state_list_l_self_state_keys_111_exp_avg_sq_ = L_self_state_list_L_self_state_keys_111_exp_avg_sq_ + l_self_state_list_l_self_state_keys_112_exp_avg_sq_ = L_self_state_list_L_self_state_keys_112_exp_avg_sq_ + l_self_state_list_l_self_state_keys_113_exp_avg_sq_ = L_self_state_list_L_self_state_keys_113_exp_avg_sq_ + l_self_state_list_l_self_state_keys_114_exp_avg_sq_ = L_self_state_list_L_self_state_keys_114_exp_avg_sq_ + l_self_state_list_l_self_state_keys_115_exp_avg_sq_ = L_self_state_list_L_self_state_keys_115_exp_avg_sq_ + l_self_state_list_l_self_state_keys_116_exp_avg_sq_ = L_self_state_list_L_self_state_keys_116_exp_avg_sq_ + l_self_state_list_l_self_state_keys_117_exp_avg_sq_ = L_self_state_list_L_self_state_keys_117_exp_avg_sq_ + l_self_state_list_l_self_state_keys_118_exp_avg_sq_ = L_self_state_list_L_self_state_keys_118_exp_avg_sq_ + l_self_state_list_l_self_state_keys_119_exp_avg_sq_ = L_self_state_list_L_self_state_keys_119_exp_avg_sq_ + l_self_state_list_l_self_state_keys_120_exp_avg_sq_ = L_self_state_list_L_self_state_keys_120_exp_avg_sq_ + l_self_state_list_l_self_state_keys_121_exp_avg_sq_ = L_self_state_list_L_self_state_keys_121_exp_avg_sq_ + l_self_state_list_l_self_state_keys_122_exp_avg_sq_ = L_self_state_list_L_self_state_keys_122_exp_avg_sq_ + l_self_state_list_l_self_state_keys_123_exp_avg_sq_ = L_self_state_list_L_self_state_keys_123_exp_avg_sq_ + l_self_state_list_l_self_state_keys_124_exp_avg_sq_ = L_self_state_list_L_self_state_keys_124_exp_avg_sq_ + l_self_state_list_l_self_state_keys_125_exp_avg_sq_ = L_self_state_list_L_self_state_keys_125_exp_avg_sq_ + l_self_state_list_l_self_state_keys_126_exp_avg_sq_ = L_self_state_list_L_self_state_keys_126_exp_avg_sq_ + l_self_state_list_l_self_state_keys_127_exp_avg_sq_ = L_self_state_list_L_self_state_keys_127_exp_avg_sq_ + l_self_state_list_l_self_state_keys_128_exp_avg_sq_ = L_self_state_list_L_self_state_keys_128_exp_avg_sq_ + l_self_state_list_l_self_state_keys_129_exp_avg_sq_ = L_self_state_list_L_self_state_keys_129_exp_avg_sq_ + l_self_state_list_l_self_state_keys_130_exp_avg_sq_ = L_self_state_list_L_self_state_keys_130_exp_avg_sq_ + l_self_state_list_l_self_state_keys_131_exp_avg_sq_ = L_self_state_list_L_self_state_keys_131_exp_avg_sq_ + l_self_state_list_l_self_state_keys_132_exp_avg_sq_ = L_self_state_list_L_self_state_keys_132_exp_avg_sq_ + l_self_state_list_l_self_state_keys_133_exp_avg_sq_ = L_self_state_list_L_self_state_keys_133_exp_avg_sq_ + l_self_state_list_l_self_state_keys_134_exp_avg_sq_ = L_self_state_list_L_self_state_keys_134_exp_avg_sq_ + l_self_state_list_l_self_state_keys_135_exp_avg_sq_ = L_self_state_list_L_self_state_keys_135_exp_avg_sq_ + l_self_state_list_l_self_state_keys_136_exp_avg_sq_ = L_self_state_list_L_self_state_keys_136_exp_avg_sq_ + l_self_state_list_l_self_state_keys_137_exp_avg_sq_ = L_self_state_list_L_self_state_keys_137_exp_avg_sq_ + l_self_state_list_l_self_state_keys_138_exp_avg_sq_ = L_self_state_list_L_self_state_keys_138_exp_avg_sq_ + l_self_state_list_l_self_state_keys_139_exp_avg_sq_ = L_self_state_list_L_self_state_keys_139_exp_avg_sq_ + l_self_state_list_l_self_state_keys_140_exp_avg_sq_ = L_self_state_list_L_self_state_keys_140_exp_avg_sq_ + l_self_state_list_l_self_state_keys_141_exp_avg_sq_ = L_self_state_list_L_self_state_keys_141_exp_avg_sq_ + l_self_state_list_l_self_state_keys_142_exp_avg_sq_ = L_self_state_list_L_self_state_keys_142_exp_avg_sq_ + l_self_state_list_l_self_state_keys_143_exp_avg_sq_ = L_self_state_list_L_self_state_keys_143_exp_avg_sq_ + l_self_state_list_l_self_state_keys_144_exp_avg_sq_ = L_self_state_list_L_self_state_keys_144_exp_avg_sq_ + l_self_state_list_l_self_state_keys_145_exp_avg_sq_ = L_self_state_list_L_self_state_keys_145_exp_avg_sq_ + l_self_state_list_l_self_state_keys_146_exp_avg_sq_ = L_self_state_list_L_self_state_keys_146_exp_avg_sq_ + l_self_state_list_l_self_state_keys_147_exp_avg_sq_ = L_self_state_list_L_self_state_keys_147_exp_avg_sq_ + l_self_state_list_l_self_state_keys_0_step_ = L_self_state_list_L_self_state_keys_0_step_ + l_self_state_list_l_self_state_keys_2_step_ = L_self_state_list_L_self_state_keys_2_step_ + l_self_state_list_l_self_state_keys_3_step_ = L_self_state_list_L_self_state_keys_3_step_ + l_self_state_list_l_self_state_keys_4_step_ = L_self_state_list_L_self_state_keys_4_step_ + l_self_state_list_l_self_state_keys_5_step_ = L_self_state_list_L_self_state_keys_5_step_ + l_self_state_list_l_self_state_keys_6_step_ = L_self_state_list_L_self_state_keys_6_step_ + l_self_state_list_l_self_state_keys_7_step_ = L_self_state_list_L_self_state_keys_7_step_ + l_self_state_list_l_self_state_keys_8_step_ = L_self_state_list_L_self_state_keys_8_step_ + l_self_state_list_l_self_state_keys_9_step_ = L_self_state_list_L_self_state_keys_9_step_ + l_self_state_list_l_self_state_keys_10_step_ = L_self_state_list_L_self_state_keys_10_step_ + l_self_state_list_l_self_state_keys_11_step_ = L_self_state_list_L_self_state_keys_11_step_ + l_self_state_list_l_self_state_keys_12_step_ = L_self_state_list_L_self_state_keys_12_step_ + l_self_state_list_l_self_state_keys_13_step_ = L_self_state_list_L_self_state_keys_13_step_ + l_self_state_list_l_self_state_keys_14_step_ = L_self_state_list_L_self_state_keys_14_step_ + l_self_state_list_l_self_state_keys_15_step_ = L_self_state_list_L_self_state_keys_15_step_ + l_self_state_list_l_self_state_keys_16_step_ = L_self_state_list_L_self_state_keys_16_step_ + l_self_state_list_l_self_state_keys_17_step_ = L_self_state_list_L_self_state_keys_17_step_ + l_self_state_list_l_self_state_keys_18_step_ = L_self_state_list_L_self_state_keys_18_step_ + l_self_state_list_l_self_state_keys_19_step_ = L_self_state_list_L_self_state_keys_19_step_ + l_self_state_list_l_self_state_keys_20_step_ = L_self_state_list_L_self_state_keys_20_step_ + l_self_state_list_l_self_state_keys_21_step_ = L_self_state_list_L_self_state_keys_21_step_ + l_self_state_list_l_self_state_keys_22_step_ = L_self_state_list_L_self_state_keys_22_step_ + l_self_state_list_l_self_state_keys_23_step_ = L_self_state_list_L_self_state_keys_23_step_ + l_self_state_list_l_self_state_keys_24_step_ = L_self_state_list_L_self_state_keys_24_step_ + l_self_state_list_l_self_state_keys_25_step_ = L_self_state_list_L_self_state_keys_25_step_ + l_self_state_list_l_self_state_keys_26_step_ = L_self_state_list_L_self_state_keys_26_step_ + l_self_state_list_l_self_state_keys_27_step_ = L_self_state_list_L_self_state_keys_27_step_ + l_self_state_list_l_self_state_keys_28_step_ = L_self_state_list_L_self_state_keys_28_step_ + l_self_state_list_l_self_state_keys_29_step_ = L_self_state_list_L_self_state_keys_29_step_ + l_self_state_list_l_self_state_keys_30_step_ = L_self_state_list_L_self_state_keys_30_step_ + l_self_state_list_l_self_state_keys_31_step_ = L_self_state_list_L_self_state_keys_31_step_ + l_self_state_list_l_self_state_keys_32_step_ = L_self_state_list_L_self_state_keys_32_step_ + l_self_state_list_l_self_state_keys_33_step_ = L_self_state_list_L_self_state_keys_33_step_ + l_self_state_list_l_self_state_keys_34_step_ = L_self_state_list_L_self_state_keys_34_step_ + l_self_state_list_l_self_state_keys_35_step_ = L_self_state_list_L_self_state_keys_35_step_ + l_self_state_list_l_self_state_keys_36_step_ = L_self_state_list_L_self_state_keys_36_step_ + l_self_state_list_l_self_state_keys_37_step_ = L_self_state_list_L_self_state_keys_37_step_ + l_self_state_list_l_self_state_keys_38_step_ = L_self_state_list_L_self_state_keys_38_step_ + l_self_state_list_l_self_state_keys_39_step_ = L_self_state_list_L_self_state_keys_39_step_ + l_self_state_list_l_self_state_keys_40_step_ = L_self_state_list_L_self_state_keys_40_step_ + l_self_state_list_l_self_state_keys_41_step_ = L_self_state_list_L_self_state_keys_41_step_ + l_self_state_list_l_self_state_keys_42_step_ = L_self_state_list_L_self_state_keys_42_step_ + l_self_state_list_l_self_state_keys_43_step_ = L_self_state_list_L_self_state_keys_43_step_ + l_self_state_list_l_self_state_keys_44_step_ = L_self_state_list_L_self_state_keys_44_step_ + l_self_state_list_l_self_state_keys_45_step_ = L_self_state_list_L_self_state_keys_45_step_ + l_self_state_list_l_self_state_keys_46_step_ = L_self_state_list_L_self_state_keys_46_step_ + l_self_state_list_l_self_state_keys_47_step_ = L_self_state_list_L_self_state_keys_47_step_ + l_self_state_list_l_self_state_keys_48_step_ = L_self_state_list_L_self_state_keys_48_step_ + l_self_state_list_l_self_state_keys_49_step_ = L_self_state_list_L_self_state_keys_49_step_ + l_self_state_list_l_self_state_keys_50_step_ = L_self_state_list_L_self_state_keys_50_step_ + l_self_state_list_l_self_state_keys_51_step_ = L_self_state_list_L_self_state_keys_51_step_ + l_self_state_list_l_self_state_keys_52_step_ = L_self_state_list_L_self_state_keys_52_step_ + l_self_state_list_l_self_state_keys_53_step_ = L_self_state_list_L_self_state_keys_53_step_ + l_self_state_list_l_self_state_keys_54_step_ = L_self_state_list_L_self_state_keys_54_step_ + l_self_state_list_l_self_state_keys_55_step_ = L_self_state_list_L_self_state_keys_55_step_ + l_self_state_list_l_self_state_keys_56_step_ = L_self_state_list_L_self_state_keys_56_step_ + l_self_state_list_l_self_state_keys_57_step_ = L_self_state_list_L_self_state_keys_57_step_ + l_self_state_list_l_self_state_keys_58_step_ = L_self_state_list_L_self_state_keys_58_step_ + l_self_state_list_l_self_state_keys_59_step_ = L_self_state_list_L_self_state_keys_59_step_ + l_self_state_list_l_self_state_keys_60_step_ = L_self_state_list_L_self_state_keys_60_step_ + l_self_state_list_l_self_state_keys_61_step_ = L_self_state_list_L_self_state_keys_61_step_ + l_self_state_list_l_self_state_keys_62_step_ = L_self_state_list_L_self_state_keys_62_step_ + l_self_state_list_l_self_state_keys_63_step_ = L_self_state_list_L_self_state_keys_63_step_ + l_self_state_list_l_self_state_keys_64_step_ = L_self_state_list_L_self_state_keys_64_step_ + l_self_state_list_l_self_state_keys_65_step_ = L_self_state_list_L_self_state_keys_65_step_ + l_self_state_list_l_self_state_keys_66_step_ = L_self_state_list_L_self_state_keys_66_step_ + l_self_state_list_l_self_state_keys_67_step_ = L_self_state_list_L_self_state_keys_67_step_ + l_self_state_list_l_self_state_keys_68_step_ = L_self_state_list_L_self_state_keys_68_step_ + l_self_state_list_l_self_state_keys_69_step_ = L_self_state_list_L_self_state_keys_69_step_ + l_self_state_list_l_self_state_keys_70_step_ = L_self_state_list_L_self_state_keys_70_step_ + l_self_state_list_l_self_state_keys_71_step_ = L_self_state_list_L_self_state_keys_71_step_ + l_self_state_list_l_self_state_keys_72_step_ = L_self_state_list_L_self_state_keys_72_step_ + l_self_state_list_l_self_state_keys_73_step_ = L_self_state_list_L_self_state_keys_73_step_ + l_self_state_list_l_self_state_keys_74_step_ = L_self_state_list_L_self_state_keys_74_step_ + l_self_state_list_l_self_state_keys_75_step_ = L_self_state_list_L_self_state_keys_75_step_ + l_self_state_list_l_self_state_keys_76_step_ = L_self_state_list_L_self_state_keys_76_step_ + l_self_state_list_l_self_state_keys_77_step_ = L_self_state_list_L_self_state_keys_77_step_ + l_self_state_list_l_self_state_keys_78_step_ = L_self_state_list_L_self_state_keys_78_step_ + l_self_state_list_l_self_state_keys_79_step_ = L_self_state_list_L_self_state_keys_79_step_ + l_self_state_list_l_self_state_keys_80_step_ = L_self_state_list_L_self_state_keys_80_step_ + l_self_state_list_l_self_state_keys_81_step_ = L_self_state_list_L_self_state_keys_81_step_ + l_self_state_list_l_self_state_keys_82_step_ = L_self_state_list_L_self_state_keys_82_step_ + l_self_state_list_l_self_state_keys_83_step_ = L_self_state_list_L_self_state_keys_83_step_ + l_self_state_list_l_self_state_keys_84_step_ = L_self_state_list_L_self_state_keys_84_step_ + l_self_state_list_l_self_state_keys_85_step_ = L_self_state_list_L_self_state_keys_85_step_ + l_self_state_list_l_self_state_keys_86_step_ = L_self_state_list_L_self_state_keys_86_step_ + l_self_state_list_l_self_state_keys_87_step_ = L_self_state_list_L_self_state_keys_87_step_ + l_self_state_list_l_self_state_keys_88_step_ = L_self_state_list_L_self_state_keys_88_step_ + l_self_state_list_l_self_state_keys_89_step_ = L_self_state_list_L_self_state_keys_89_step_ + l_self_state_list_l_self_state_keys_90_step_ = L_self_state_list_L_self_state_keys_90_step_ + l_self_state_list_l_self_state_keys_91_step_ = L_self_state_list_L_self_state_keys_91_step_ + l_self_state_list_l_self_state_keys_92_step_ = L_self_state_list_L_self_state_keys_92_step_ + l_self_state_list_l_self_state_keys_93_step_ = L_self_state_list_L_self_state_keys_93_step_ + l_self_state_list_l_self_state_keys_94_step_ = L_self_state_list_L_self_state_keys_94_step_ + l_self_state_list_l_self_state_keys_95_step_ = L_self_state_list_L_self_state_keys_95_step_ + l_self_state_list_l_self_state_keys_96_step_ = L_self_state_list_L_self_state_keys_96_step_ + l_self_state_list_l_self_state_keys_97_step_ = L_self_state_list_L_self_state_keys_97_step_ + l_self_state_list_l_self_state_keys_98_step_ = L_self_state_list_L_self_state_keys_98_step_ + l_self_state_list_l_self_state_keys_99_step_ = L_self_state_list_L_self_state_keys_99_step_ + l_self_state_list_l_self_state_keys_100_step_ = L_self_state_list_L_self_state_keys_100_step_ + l_self_state_list_l_self_state_keys_101_step_ = L_self_state_list_L_self_state_keys_101_step_ + l_self_state_list_l_self_state_keys_102_step_ = L_self_state_list_L_self_state_keys_102_step_ + l_self_state_list_l_self_state_keys_103_step_ = L_self_state_list_L_self_state_keys_103_step_ + l_self_state_list_l_self_state_keys_104_step_ = L_self_state_list_L_self_state_keys_104_step_ + l_self_state_list_l_self_state_keys_105_step_ = L_self_state_list_L_self_state_keys_105_step_ + l_self_state_list_l_self_state_keys_106_step_ = L_self_state_list_L_self_state_keys_106_step_ + l_self_state_list_l_self_state_keys_107_step_ = L_self_state_list_L_self_state_keys_107_step_ + l_self_state_list_l_self_state_keys_108_step_ = L_self_state_list_L_self_state_keys_108_step_ + l_self_state_list_l_self_state_keys_109_step_ = L_self_state_list_L_self_state_keys_109_step_ + l_self_state_list_l_self_state_keys_110_step_ = L_self_state_list_L_self_state_keys_110_step_ + l_self_state_list_l_self_state_keys_111_step_ = L_self_state_list_L_self_state_keys_111_step_ + l_self_state_list_l_self_state_keys_112_step_ = L_self_state_list_L_self_state_keys_112_step_ + l_self_state_list_l_self_state_keys_113_step_ = L_self_state_list_L_self_state_keys_113_step_ + l_self_state_list_l_self_state_keys_114_step_ = L_self_state_list_L_self_state_keys_114_step_ + l_self_state_list_l_self_state_keys_115_step_ = L_self_state_list_L_self_state_keys_115_step_ + l_self_state_list_l_self_state_keys_116_step_ = L_self_state_list_L_self_state_keys_116_step_ + l_self_state_list_l_self_state_keys_117_step_ = L_self_state_list_L_self_state_keys_117_step_ + l_self_state_list_l_self_state_keys_118_step_ = L_self_state_list_L_self_state_keys_118_step_ + l_self_state_list_l_self_state_keys_119_step_ = L_self_state_list_L_self_state_keys_119_step_ + l_self_state_list_l_self_state_keys_120_step_ = L_self_state_list_L_self_state_keys_120_step_ + l_self_state_list_l_self_state_keys_121_step_ = L_self_state_list_L_self_state_keys_121_step_ + l_self_state_list_l_self_state_keys_122_step_ = L_self_state_list_L_self_state_keys_122_step_ + l_self_state_list_l_self_state_keys_123_step_ = L_self_state_list_L_self_state_keys_123_step_ + l_self_state_list_l_self_state_keys_124_step_ = L_self_state_list_L_self_state_keys_124_step_ + l_self_state_list_l_self_state_keys_125_step_ = L_self_state_list_L_self_state_keys_125_step_ + l_self_state_list_l_self_state_keys_126_step_ = L_self_state_list_L_self_state_keys_126_step_ + l_self_state_list_l_self_state_keys_127_step_ = L_self_state_list_L_self_state_keys_127_step_ + l_self_state_list_l_self_state_keys_128_step_ = L_self_state_list_L_self_state_keys_128_step_ + l_self_state_list_l_self_state_keys_129_step_ = L_self_state_list_L_self_state_keys_129_step_ + l_self_state_list_l_self_state_keys_130_step_ = L_self_state_list_L_self_state_keys_130_step_ + l_self_state_list_l_self_state_keys_131_step_ = L_self_state_list_L_self_state_keys_131_step_ + l_self_state_list_l_self_state_keys_132_step_ = L_self_state_list_L_self_state_keys_132_step_ + l_self_state_list_l_self_state_keys_133_step_ = L_self_state_list_L_self_state_keys_133_step_ + l_self_state_list_l_self_state_keys_134_step_ = L_self_state_list_L_self_state_keys_134_step_ + l_self_state_list_l_self_state_keys_135_step_ = L_self_state_list_L_self_state_keys_135_step_ + l_self_state_list_l_self_state_keys_136_step_ = L_self_state_list_L_self_state_keys_136_step_ + l_self_state_list_l_self_state_keys_137_step_ = L_self_state_list_L_self_state_keys_137_step_ + l_self_state_list_l_self_state_keys_138_step_ = L_self_state_list_L_self_state_keys_138_step_ + l_self_state_list_l_self_state_keys_139_step_ = L_self_state_list_L_self_state_keys_139_step_ + l_self_state_list_l_self_state_keys_140_step_ = L_self_state_list_L_self_state_keys_140_step_ + l_self_state_list_l_self_state_keys_141_step_ = L_self_state_list_L_self_state_keys_141_step_ + l_self_state_list_l_self_state_keys_142_step_ = L_self_state_list_L_self_state_keys_142_step_ + l_self_state_list_l_self_state_keys_143_step_ = L_self_state_list_L_self_state_keys_143_step_ + l_self_state_list_l_self_state_keys_144_step_ = L_self_state_list_L_self_state_keys_144_step_ + l_self_state_list_l_self_state_keys_145_step_ = L_self_state_list_L_self_state_keys_145_step_ + l_self_state_list_l_self_state_keys_146_step_ = L_self_state_list_L_self_state_keys_146_step_ + l_self_state_list_l_self_state_keys_147_step_ = L_self_state_list_L_self_state_keys_147_step_ + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1) + _foreach_add_ = torch._foreach_add_([l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_], 1); _foreach_add_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1) + _foreach_lerp_ = torch._foreach_lerp_([l_self_state_list_l_self_state_keys_0_exp_avg_, l_self_state_list_l_self_state_keys_1_exp_avg_, l_self_state_list_l_self_state_keys_2_exp_avg_, l_self_state_list_l_self_state_keys_3_exp_avg_, l_self_state_list_l_self_state_keys_4_exp_avg_, l_self_state_list_l_self_state_keys_5_exp_avg_, l_self_state_list_l_self_state_keys_6_exp_avg_, l_self_state_list_l_self_state_keys_7_exp_avg_, l_self_state_list_l_self_state_keys_8_exp_avg_, l_self_state_list_l_self_state_keys_9_exp_avg_, l_self_state_list_l_self_state_keys_10_exp_avg_, l_self_state_list_l_self_state_keys_11_exp_avg_, l_self_state_list_l_self_state_keys_12_exp_avg_, l_self_state_list_l_self_state_keys_13_exp_avg_, l_self_state_list_l_self_state_keys_14_exp_avg_, l_self_state_list_l_self_state_keys_15_exp_avg_, l_self_state_list_l_self_state_keys_16_exp_avg_, l_self_state_list_l_self_state_keys_17_exp_avg_, l_self_state_list_l_self_state_keys_18_exp_avg_, l_self_state_list_l_self_state_keys_19_exp_avg_, l_self_state_list_l_self_state_keys_20_exp_avg_, l_self_state_list_l_self_state_keys_21_exp_avg_, l_self_state_list_l_self_state_keys_22_exp_avg_, l_self_state_list_l_self_state_keys_23_exp_avg_, l_self_state_list_l_self_state_keys_24_exp_avg_, l_self_state_list_l_self_state_keys_25_exp_avg_, l_self_state_list_l_self_state_keys_26_exp_avg_, l_self_state_list_l_self_state_keys_27_exp_avg_, l_self_state_list_l_self_state_keys_28_exp_avg_, l_self_state_list_l_self_state_keys_29_exp_avg_, l_self_state_list_l_self_state_keys_30_exp_avg_, l_self_state_list_l_self_state_keys_31_exp_avg_, l_self_state_list_l_self_state_keys_32_exp_avg_, l_self_state_list_l_self_state_keys_33_exp_avg_, l_self_state_list_l_self_state_keys_34_exp_avg_, l_self_state_list_l_self_state_keys_35_exp_avg_, l_self_state_list_l_self_state_keys_36_exp_avg_, l_self_state_list_l_self_state_keys_37_exp_avg_, l_self_state_list_l_self_state_keys_38_exp_avg_, l_self_state_list_l_self_state_keys_39_exp_avg_, l_self_state_list_l_self_state_keys_40_exp_avg_, l_self_state_list_l_self_state_keys_41_exp_avg_, l_self_state_list_l_self_state_keys_42_exp_avg_, l_self_state_list_l_self_state_keys_43_exp_avg_, l_self_state_list_l_self_state_keys_44_exp_avg_, l_self_state_list_l_self_state_keys_45_exp_avg_, l_self_state_list_l_self_state_keys_46_exp_avg_, l_self_state_list_l_self_state_keys_47_exp_avg_, l_self_state_list_l_self_state_keys_48_exp_avg_, l_self_state_list_l_self_state_keys_49_exp_avg_, l_self_state_list_l_self_state_keys_50_exp_avg_, l_self_state_list_l_self_state_keys_51_exp_avg_, l_self_state_list_l_self_state_keys_52_exp_avg_, l_self_state_list_l_self_state_keys_53_exp_avg_, l_self_state_list_l_self_state_keys_54_exp_avg_, l_self_state_list_l_self_state_keys_55_exp_avg_, l_self_state_list_l_self_state_keys_56_exp_avg_, l_self_state_list_l_self_state_keys_57_exp_avg_, l_self_state_list_l_self_state_keys_58_exp_avg_, l_self_state_list_l_self_state_keys_59_exp_avg_, l_self_state_list_l_self_state_keys_60_exp_avg_, l_self_state_list_l_self_state_keys_61_exp_avg_, l_self_state_list_l_self_state_keys_62_exp_avg_, l_self_state_list_l_self_state_keys_63_exp_avg_, l_self_state_list_l_self_state_keys_64_exp_avg_, l_self_state_list_l_self_state_keys_65_exp_avg_, l_self_state_list_l_self_state_keys_66_exp_avg_, l_self_state_list_l_self_state_keys_67_exp_avg_, l_self_state_list_l_self_state_keys_68_exp_avg_, l_self_state_list_l_self_state_keys_69_exp_avg_, l_self_state_list_l_self_state_keys_70_exp_avg_, l_self_state_list_l_self_state_keys_71_exp_avg_, l_self_state_list_l_self_state_keys_72_exp_avg_, l_self_state_list_l_self_state_keys_73_exp_avg_, l_self_state_list_l_self_state_keys_74_exp_avg_, l_self_state_list_l_self_state_keys_75_exp_avg_, l_self_state_list_l_self_state_keys_76_exp_avg_, l_self_state_list_l_self_state_keys_77_exp_avg_, l_self_state_list_l_self_state_keys_78_exp_avg_, l_self_state_list_l_self_state_keys_79_exp_avg_, l_self_state_list_l_self_state_keys_80_exp_avg_, l_self_state_list_l_self_state_keys_81_exp_avg_, l_self_state_list_l_self_state_keys_82_exp_avg_, l_self_state_list_l_self_state_keys_83_exp_avg_, l_self_state_list_l_self_state_keys_84_exp_avg_, l_self_state_list_l_self_state_keys_85_exp_avg_, l_self_state_list_l_self_state_keys_86_exp_avg_, l_self_state_list_l_self_state_keys_87_exp_avg_, l_self_state_list_l_self_state_keys_88_exp_avg_, l_self_state_list_l_self_state_keys_89_exp_avg_, l_self_state_list_l_self_state_keys_90_exp_avg_, l_self_state_list_l_self_state_keys_91_exp_avg_, l_self_state_list_l_self_state_keys_92_exp_avg_, l_self_state_list_l_self_state_keys_93_exp_avg_, l_self_state_list_l_self_state_keys_94_exp_avg_, l_self_state_list_l_self_state_keys_95_exp_avg_, l_self_state_list_l_self_state_keys_96_exp_avg_, l_self_state_list_l_self_state_keys_97_exp_avg_, l_self_state_list_l_self_state_keys_98_exp_avg_, l_self_state_list_l_self_state_keys_99_exp_avg_, l_self_state_list_l_self_state_keys_100_exp_avg_, l_self_state_list_l_self_state_keys_101_exp_avg_, l_self_state_list_l_self_state_keys_102_exp_avg_, l_self_state_list_l_self_state_keys_103_exp_avg_, l_self_state_list_l_self_state_keys_104_exp_avg_, l_self_state_list_l_self_state_keys_105_exp_avg_, l_self_state_list_l_self_state_keys_106_exp_avg_, l_self_state_list_l_self_state_keys_107_exp_avg_, l_self_state_list_l_self_state_keys_108_exp_avg_, l_self_state_list_l_self_state_keys_109_exp_avg_, l_self_state_list_l_self_state_keys_110_exp_avg_, l_self_state_list_l_self_state_keys_111_exp_avg_, l_self_state_list_l_self_state_keys_112_exp_avg_, l_self_state_list_l_self_state_keys_113_exp_avg_, l_self_state_list_l_self_state_keys_114_exp_avg_, l_self_state_list_l_self_state_keys_115_exp_avg_, l_self_state_list_l_self_state_keys_116_exp_avg_, l_self_state_list_l_self_state_keys_117_exp_avg_, l_self_state_list_l_self_state_keys_118_exp_avg_, l_self_state_list_l_self_state_keys_119_exp_avg_, l_self_state_list_l_self_state_keys_120_exp_avg_, l_self_state_list_l_self_state_keys_121_exp_avg_, l_self_state_list_l_self_state_keys_122_exp_avg_, l_self_state_list_l_self_state_keys_123_exp_avg_, l_self_state_list_l_self_state_keys_124_exp_avg_, l_self_state_list_l_self_state_keys_125_exp_avg_, l_self_state_list_l_self_state_keys_126_exp_avg_, l_self_state_list_l_self_state_keys_127_exp_avg_, l_self_state_list_l_self_state_keys_128_exp_avg_, l_self_state_list_l_self_state_keys_129_exp_avg_, l_self_state_list_l_self_state_keys_130_exp_avg_, l_self_state_list_l_self_state_keys_131_exp_avg_, l_self_state_list_l_self_state_keys_132_exp_avg_, l_self_state_list_l_self_state_keys_133_exp_avg_, l_self_state_list_l_self_state_keys_134_exp_avg_, l_self_state_list_l_self_state_keys_135_exp_avg_, l_self_state_list_l_self_state_keys_136_exp_avg_, l_self_state_list_l_self_state_keys_137_exp_avg_, l_self_state_list_l_self_state_keys_138_exp_avg_, l_self_state_list_l_self_state_keys_139_exp_avg_, l_self_state_list_l_self_state_keys_140_exp_avg_, l_self_state_list_l_self_state_keys_141_exp_avg_, l_self_state_list_l_self_state_keys_142_exp_avg_, l_self_state_list_l_self_state_keys_143_exp_avg_, l_self_state_list_l_self_state_keys_144_exp_avg_, l_self_state_list_l_self_state_keys_145_exp_avg_, l_self_state_list_l_self_state_keys_146_exp_avg_, l_self_state_list_l_self_state_keys_147_exp_avg_], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], 0.09999999999999998); _foreach_lerp_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2) + _foreach_mul_ = torch._foreach_mul_([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_], 0.999); _foreach_mul_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + _foreach_addcmul_ = torch._foreach_addcmul_([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], 0.0010000000000000009); l_self_param_groups_0_params_0_grad = l_self_param_groups_0_params_1_grad = l_self_param_groups_0_params_2_grad = l_self_param_groups_0_params_3_grad = l_self_param_groups_0_params_4_grad = l_self_param_groups_0_params_5_grad = l_self_param_groups_0_params_6_grad = l_self_param_groups_0_params_7_grad = l_self_param_groups_0_params_8_grad = l_self_param_groups_0_params_9_grad = l_self_param_groups_0_params_10_grad = l_self_param_groups_0_params_11_grad = l_self_param_groups_0_params_12_grad = l_self_param_groups_0_params_13_grad = l_self_param_groups_0_params_14_grad = l_self_param_groups_0_params_15_grad = l_self_param_groups_0_params_16_grad = l_self_param_groups_0_params_17_grad = l_self_param_groups_0_params_18_grad = l_self_param_groups_0_params_19_grad = l_self_param_groups_0_params_20_grad = l_self_param_groups_0_params_21_grad = l_self_param_groups_0_params_22_grad = l_self_param_groups_0_params_23_grad = l_self_param_groups_0_params_24_grad = l_self_param_groups_0_params_25_grad = l_self_param_groups_0_params_26_grad = l_self_param_groups_0_params_27_grad = l_self_param_groups_0_params_28_grad = l_self_param_groups_0_params_29_grad = l_self_param_groups_0_params_30_grad = l_self_param_groups_0_params_31_grad = l_self_param_groups_0_params_32_grad = l_self_param_groups_0_params_33_grad = l_self_param_groups_0_params_34_grad = l_self_param_groups_0_params_35_grad = l_self_param_groups_0_params_36_grad = l_self_param_groups_0_params_37_grad = l_self_param_groups_0_params_38_grad = l_self_param_groups_0_params_39_grad = l_self_param_groups_0_params_40_grad = l_self_param_groups_0_params_41_grad = l_self_param_groups_0_params_42_grad = l_self_param_groups_0_params_43_grad = l_self_param_groups_0_params_44_grad = l_self_param_groups_0_params_45_grad = l_self_param_groups_0_params_46_grad = l_self_param_groups_0_params_47_grad = l_self_param_groups_0_params_48_grad = l_self_param_groups_0_params_49_grad = l_self_param_groups_0_params_50_grad = l_self_param_groups_0_params_51_grad = l_self_param_groups_0_params_52_grad = l_self_param_groups_0_params_53_grad = l_self_param_groups_0_params_54_grad = l_self_param_groups_0_params_55_grad = l_self_param_groups_0_params_56_grad = l_self_param_groups_0_params_57_grad = l_self_param_groups_0_params_58_grad = l_self_param_groups_0_params_59_grad = l_self_param_groups_0_params_60_grad = l_self_param_groups_0_params_61_grad = l_self_param_groups_0_params_62_grad = l_self_param_groups_0_params_63_grad = l_self_param_groups_0_params_64_grad = l_self_param_groups_0_params_65_grad = l_self_param_groups_0_params_66_grad = l_self_param_groups_0_params_67_grad = l_self_param_groups_0_params_68_grad = l_self_param_groups_0_params_69_grad = l_self_param_groups_0_params_70_grad = l_self_param_groups_0_params_71_grad = l_self_param_groups_0_params_72_grad = l_self_param_groups_0_params_73_grad = l_self_param_groups_0_params_74_grad = l_self_param_groups_0_params_75_grad = l_self_param_groups_0_params_76_grad = l_self_param_groups_0_params_77_grad = l_self_param_groups_0_params_78_grad = l_self_param_groups_0_params_79_grad = l_self_param_groups_0_params_80_grad = l_self_param_groups_0_params_81_grad = l_self_param_groups_0_params_82_grad = l_self_param_groups_0_params_83_grad = l_self_param_groups_0_params_84_grad = l_self_param_groups_0_params_85_grad = l_self_param_groups_0_params_86_grad = l_self_param_groups_0_params_87_grad = l_self_param_groups_0_params_88_grad = l_self_param_groups_0_params_89_grad = l_self_param_groups_0_params_90_grad = l_self_param_groups_0_params_91_grad = l_self_param_groups_0_params_92_grad = l_self_param_groups_0_params_93_grad = l_self_param_groups_0_params_94_grad = l_self_param_groups_0_params_95_grad = l_self_param_groups_0_params_96_grad = l_self_param_groups_0_params_97_grad = l_self_param_groups_0_params_98_grad = l_self_param_groups_0_params_99_grad = l_self_param_groups_0_params_100_grad = l_self_param_groups_0_params_101_grad = l_self_param_groups_0_params_102_grad = l_self_param_groups_0_params_103_grad = l_self_param_groups_0_params_104_grad = l_self_param_groups_0_params_105_grad = l_self_param_groups_0_params_106_grad = l_self_param_groups_0_params_107_grad = l_self_param_groups_0_params_108_grad = l_self_param_groups_0_params_109_grad = l_self_param_groups_0_params_110_grad = l_self_param_groups_0_params_111_grad = l_self_param_groups_0_params_112_grad = l_self_param_groups_0_params_113_grad = l_self_param_groups_0_params_114_grad = l_self_param_groups_0_params_115_grad = l_self_param_groups_0_params_116_grad = l_self_param_groups_0_params_117_grad = l_self_param_groups_0_params_118_grad = l_self_param_groups_0_params_119_grad = l_self_param_groups_0_params_120_grad = l_self_param_groups_0_params_121_grad = l_self_param_groups_0_params_122_grad = l_self_param_groups_0_params_123_grad = l_self_param_groups_0_params_124_grad = l_self_param_groups_0_params_125_grad = l_self_param_groups_0_params_126_grad = l_self_param_groups_0_params_127_grad = l_self_param_groups_0_params_128_grad = l_self_param_groups_0_params_129_grad = l_self_param_groups_0_params_130_grad = l_self_param_groups_0_params_131_grad = l_self_param_groups_0_params_132_grad = l_self_param_groups_0_params_133_grad = l_self_param_groups_0_params_134_grad = l_self_param_groups_0_params_135_grad = l_self_param_groups_0_params_136_grad = l_self_param_groups_0_params_137_grad = l_self_param_groups_0_params_138_grad = l_self_param_groups_0_params_139_grad = l_self_param_groups_0_params_140_grad = l_self_param_groups_0_params_141_grad = l_self_param_groups_0_params_142_grad = l_self_param_groups_0_params_143_grad = l_self_param_groups_0_params_144_grad = l_self_param_groups_0_params_145_grad = l_self_param_groups_0_params_146_grad = l_self_param_groups_0_params_147_grad = _foreach_addcmul_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps) + _foreach_pow = torch._foreach_pow(0.9, [l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_]) + getitem_592: "f32[][]cuda:0" = _foreach_pow[0] + getitem_593: "f32[][]cuda:0" = _foreach_pow[1] + getitem_594: "f32[][]cuda:0" = _foreach_pow[2] + getitem_595: "f32[][]cuda:0" = _foreach_pow[3] + getitem_596: "f32[][]cuda:0" = _foreach_pow[4] + getitem_597: "f32[][]cuda:0" = _foreach_pow[5] + getitem_598: "f32[][]cuda:0" = _foreach_pow[6] + getitem_599: "f32[][]cuda:0" = _foreach_pow[7] + getitem_600: "f32[][]cuda:0" = _foreach_pow[8] + getitem_601: "f32[][]cuda:0" = _foreach_pow[9] + getitem_602: "f32[][]cuda:0" = _foreach_pow[10] + getitem_603: "f32[][]cuda:0" = _foreach_pow[11] + getitem_604: "f32[][]cuda:0" = _foreach_pow[12] + getitem_605: "f32[][]cuda:0" = _foreach_pow[13] + getitem_606: "f32[][]cuda:0" = _foreach_pow[14] + getitem_607: "f32[][]cuda:0" = _foreach_pow[15] + getitem_608: "f32[][]cuda:0" = _foreach_pow[16] + getitem_609: "f32[][]cuda:0" = _foreach_pow[17] + getitem_610: "f32[][]cuda:0" = _foreach_pow[18] + getitem_611: "f32[][]cuda:0" = _foreach_pow[19] + getitem_612: "f32[][]cuda:0" = _foreach_pow[20] + getitem_613: "f32[][]cuda:0" = _foreach_pow[21] + getitem_614: "f32[][]cuda:0" = _foreach_pow[22] + getitem_615: "f32[][]cuda:0" = _foreach_pow[23] + getitem_616: "f32[][]cuda:0" = _foreach_pow[24] + getitem_617: "f32[][]cuda:0" = _foreach_pow[25] + getitem_618: "f32[][]cuda:0" = _foreach_pow[26] + getitem_619: "f32[][]cuda:0" = _foreach_pow[27] + getitem_620: "f32[][]cuda:0" = _foreach_pow[28] + getitem_621: "f32[][]cuda:0" = _foreach_pow[29] + getitem_622: "f32[][]cuda:0" = _foreach_pow[30] + getitem_623: "f32[][]cuda:0" = _foreach_pow[31] + getitem_624: "f32[][]cuda:0" = _foreach_pow[32] + getitem_625: "f32[][]cuda:0" = _foreach_pow[33] + getitem_626: "f32[][]cuda:0" = _foreach_pow[34] + getitem_627: "f32[][]cuda:0" = _foreach_pow[35] + getitem_628: "f32[][]cuda:0" = _foreach_pow[36] + getitem_629: "f32[][]cuda:0" = _foreach_pow[37] + getitem_630: "f32[][]cuda:0" = _foreach_pow[38] + getitem_631: "f32[][]cuda:0" = _foreach_pow[39] + getitem_632: "f32[][]cuda:0" = _foreach_pow[40] + getitem_633: "f32[][]cuda:0" = _foreach_pow[41] + getitem_634: "f32[][]cuda:0" = _foreach_pow[42] + getitem_635: "f32[][]cuda:0" = _foreach_pow[43] + getitem_636: "f32[][]cuda:0" = _foreach_pow[44] + getitem_637: "f32[][]cuda:0" = _foreach_pow[45] + getitem_638: "f32[][]cuda:0" = _foreach_pow[46] + getitem_639: "f32[][]cuda:0" = _foreach_pow[47] + getitem_640: "f32[][]cuda:0" = _foreach_pow[48] + getitem_641: "f32[][]cuda:0" = _foreach_pow[49] + getitem_642: "f32[][]cuda:0" = _foreach_pow[50] + getitem_643: "f32[][]cuda:0" = _foreach_pow[51] + getitem_644: "f32[][]cuda:0" = _foreach_pow[52] + getitem_645: "f32[][]cuda:0" = _foreach_pow[53] + getitem_646: "f32[][]cuda:0" = _foreach_pow[54] + getitem_647: "f32[][]cuda:0" = _foreach_pow[55] + getitem_648: "f32[][]cuda:0" = _foreach_pow[56] + getitem_649: "f32[][]cuda:0" = _foreach_pow[57] + getitem_650: "f32[][]cuda:0" = _foreach_pow[58] + getitem_651: "f32[][]cuda:0" = _foreach_pow[59] + getitem_652: "f32[][]cuda:0" = _foreach_pow[60] + getitem_653: "f32[][]cuda:0" = _foreach_pow[61] + getitem_654: "f32[][]cuda:0" = _foreach_pow[62] + getitem_655: "f32[][]cuda:0" = _foreach_pow[63] + getitem_656: "f32[][]cuda:0" = _foreach_pow[64] + getitem_657: "f32[][]cuda:0" = _foreach_pow[65] + getitem_658: "f32[][]cuda:0" = _foreach_pow[66] + getitem_659: "f32[][]cuda:0" = _foreach_pow[67] + getitem_660: "f32[][]cuda:0" = _foreach_pow[68] + getitem_661: "f32[][]cuda:0" = _foreach_pow[69] + getitem_662: "f32[][]cuda:0" = _foreach_pow[70] + getitem_663: "f32[][]cuda:0" = _foreach_pow[71] + getitem_664: "f32[][]cuda:0" = _foreach_pow[72] + getitem_665: "f32[][]cuda:0" = _foreach_pow[73] + getitem_666: "f32[][]cuda:0" = _foreach_pow[74] + getitem_667: "f32[][]cuda:0" = _foreach_pow[75] + getitem_668: "f32[][]cuda:0" = _foreach_pow[76] + getitem_669: "f32[][]cuda:0" = _foreach_pow[77] + getitem_670: "f32[][]cuda:0" = _foreach_pow[78] + getitem_671: "f32[][]cuda:0" = _foreach_pow[79] + getitem_672: "f32[][]cuda:0" = _foreach_pow[80] + getitem_673: "f32[][]cuda:0" = _foreach_pow[81] + getitem_674: "f32[][]cuda:0" = _foreach_pow[82] + getitem_675: "f32[][]cuda:0" = _foreach_pow[83] + getitem_676: "f32[][]cuda:0" = _foreach_pow[84] + getitem_677: "f32[][]cuda:0" = _foreach_pow[85] + getitem_678: "f32[][]cuda:0" = _foreach_pow[86] + getitem_679: "f32[][]cuda:0" = _foreach_pow[87] + getitem_680: "f32[][]cuda:0" = _foreach_pow[88] + getitem_681: "f32[][]cuda:0" = _foreach_pow[89] + getitem_682: "f32[][]cuda:0" = _foreach_pow[90] + getitem_683: "f32[][]cuda:0" = _foreach_pow[91] + getitem_684: "f32[][]cuda:0" = _foreach_pow[92] + getitem_685: "f32[][]cuda:0" = _foreach_pow[93] + getitem_686: "f32[][]cuda:0" = _foreach_pow[94] + getitem_687: "f32[][]cuda:0" = _foreach_pow[95] + getitem_688: "f32[][]cuda:0" = _foreach_pow[96] + getitem_689: "f32[][]cuda:0" = _foreach_pow[97] + getitem_690: "f32[][]cuda:0" = _foreach_pow[98] + getitem_691: "f32[][]cuda:0" = _foreach_pow[99] + getitem_692: "f32[][]cuda:0" = _foreach_pow[100] + getitem_693: "f32[][]cuda:0" = _foreach_pow[101] + getitem_694: "f32[][]cuda:0" = _foreach_pow[102] + getitem_695: "f32[][]cuda:0" = _foreach_pow[103] + getitem_696: "f32[][]cuda:0" = _foreach_pow[104] + getitem_697: "f32[][]cuda:0" = _foreach_pow[105] + getitem_698: "f32[][]cuda:0" = _foreach_pow[106] + getitem_699: "f32[][]cuda:0" = _foreach_pow[107] + getitem_700: "f32[][]cuda:0" = _foreach_pow[108] + getitem_701: "f32[][]cuda:0" = _foreach_pow[109] + getitem_702: "f32[][]cuda:0" = _foreach_pow[110] + getitem_703: "f32[][]cuda:0" = _foreach_pow[111] + getitem_704: "f32[][]cuda:0" = _foreach_pow[112] + getitem_705: "f32[][]cuda:0" = _foreach_pow[113] + getitem_706: "f32[][]cuda:0" = _foreach_pow[114] + getitem_707: "f32[][]cuda:0" = _foreach_pow[115] + getitem_708: "f32[][]cuda:0" = _foreach_pow[116] + getitem_709: "f32[][]cuda:0" = _foreach_pow[117] + getitem_710: "f32[][]cuda:0" = _foreach_pow[118] + getitem_711: "f32[][]cuda:0" = _foreach_pow[119] + getitem_712: "f32[][]cuda:0" = _foreach_pow[120] + getitem_713: "f32[][]cuda:0" = _foreach_pow[121] + getitem_714: "f32[][]cuda:0" = _foreach_pow[122] + getitem_715: "f32[][]cuda:0" = _foreach_pow[123] + getitem_716: "f32[][]cuda:0" = _foreach_pow[124] + getitem_717: "f32[][]cuda:0" = _foreach_pow[125] + getitem_718: "f32[][]cuda:0" = _foreach_pow[126] + getitem_719: "f32[][]cuda:0" = _foreach_pow[127] + getitem_720: "f32[][]cuda:0" = _foreach_pow[128] + getitem_721: "f32[][]cuda:0" = _foreach_pow[129] + getitem_722: "f32[][]cuda:0" = _foreach_pow[130] + getitem_723: "f32[][]cuda:0" = _foreach_pow[131] + getitem_724: "f32[][]cuda:0" = _foreach_pow[132] + getitem_725: "f32[][]cuda:0" = _foreach_pow[133] + getitem_726: "f32[][]cuda:0" = _foreach_pow[134] + getitem_727: "f32[][]cuda:0" = _foreach_pow[135] + getitem_728: "f32[][]cuda:0" = _foreach_pow[136] + getitem_729: "f32[][]cuda:0" = _foreach_pow[137] + getitem_730: "f32[][]cuda:0" = _foreach_pow[138] + getitem_731: "f32[][]cuda:0" = _foreach_pow[139] + getitem_732: "f32[][]cuda:0" = _foreach_pow[140] + getitem_733: "f32[][]cuda:0" = _foreach_pow[141] + getitem_734: "f32[][]cuda:0" = _foreach_pow[142] + getitem_735: "f32[][]cuda:0" = _foreach_pow[143] + getitem_736: "f32[][]cuda:0" = _foreach_pow[144] + getitem_737: "f32[][]cuda:0" = _foreach_pow[145] + getitem_738: "f32[][]cuda:0" = _foreach_pow[146] + getitem_739: "f32[][]cuda:0" = _foreach_pow[147]; _foreach_pow = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps) + _foreach_pow_1 = torch._foreach_pow(0.999, [l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_]); l_self_state_list_l_self_state_keys_0_step_ = l_self_state_list_l_self_state_keys_1_step_ = l_self_state_list_l_self_state_keys_2_step_ = l_self_state_list_l_self_state_keys_3_step_ = l_self_state_list_l_self_state_keys_4_step_ = l_self_state_list_l_self_state_keys_5_step_ = l_self_state_list_l_self_state_keys_6_step_ = l_self_state_list_l_self_state_keys_7_step_ = l_self_state_list_l_self_state_keys_8_step_ = l_self_state_list_l_self_state_keys_9_step_ = l_self_state_list_l_self_state_keys_10_step_ = l_self_state_list_l_self_state_keys_11_step_ = l_self_state_list_l_self_state_keys_12_step_ = l_self_state_list_l_self_state_keys_13_step_ = l_self_state_list_l_self_state_keys_14_step_ = l_self_state_list_l_self_state_keys_15_step_ = l_self_state_list_l_self_state_keys_16_step_ = l_self_state_list_l_self_state_keys_17_step_ = l_self_state_list_l_self_state_keys_18_step_ = l_self_state_list_l_self_state_keys_19_step_ = l_self_state_list_l_self_state_keys_20_step_ = l_self_state_list_l_self_state_keys_21_step_ = l_self_state_list_l_self_state_keys_22_step_ = l_self_state_list_l_self_state_keys_23_step_ = l_self_state_list_l_self_state_keys_24_step_ = l_self_state_list_l_self_state_keys_25_step_ = l_self_state_list_l_self_state_keys_26_step_ = l_self_state_list_l_self_state_keys_27_step_ = l_self_state_list_l_self_state_keys_28_step_ = l_self_state_list_l_self_state_keys_29_step_ = l_self_state_list_l_self_state_keys_30_step_ = l_self_state_list_l_self_state_keys_31_step_ = l_self_state_list_l_self_state_keys_32_step_ = l_self_state_list_l_self_state_keys_33_step_ = l_self_state_list_l_self_state_keys_34_step_ = l_self_state_list_l_self_state_keys_35_step_ = l_self_state_list_l_self_state_keys_36_step_ = l_self_state_list_l_self_state_keys_37_step_ = l_self_state_list_l_self_state_keys_38_step_ = l_self_state_list_l_self_state_keys_39_step_ = l_self_state_list_l_self_state_keys_40_step_ = l_self_state_list_l_self_state_keys_41_step_ = l_self_state_list_l_self_state_keys_42_step_ = l_self_state_list_l_self_state_keys_43_step_ = l_self_state_list_l_self_state_keys_44_step_ = l_self_state_list_l_self_state_keys_45_step_ = l_self_state_list_l_self_state_keys_46_step_ = l_self_state_list_l_self_state_keys_47_step_ = l_self_state_list_l_self_state_keys_48_step_ = l_self_state_list_l_self_state_keys_49_step_ = l_self_state_list_l_self_state_keys_50_step_ = l_self_state_list_l_self_state_keys_51_step_ = l_self_state_list_l_self_state_keys_52_step_ = l_self_state_list_l_self_state_keys_53_step_ = l_self_state_list_l_self_state_keys_54_step_ = l_self_state_list_l_self_state_keys_55_step_ = l_self_state_list_l_self_state_keys_56_step_ = l_self_state_list_l_self_state_keys_57_step_ = l_self_state_list_l_self_state_keys_58_step_ = l_self_state_list_l_self_state_keys_59_step_ = l_self_state_list_l_self_state_keys_60_step_ = l_self_state_list_l_self_state_keys_61_step_ = l_self_state_list_l_self_state_keys_62_step_ = l_self_state_list_l_self_state_keys_63_step_ = l_self_state_list_l_self_state_keys_64_step_ = l_self_state_list_l_self_state_keys_65_step_ = l_self_state_list_l_self_state_keys_66_step_ = l_self_state_list_l_self_state_keys_67_step_ = l_self_state_list_l_self_state_keys_68_step_ = l_self_state_list_l_self_state_keys_69_step_ = l_self_state_list_l_self_state_keys_70_step_ = l_self_state_list_l_self_state_keys_71_step_ = l_self_state_list_l_self_state_keys_72_step_ = l_self_state_list_l_self_state_keys_73_step_ = l_self_state_list_l_self_state_keys_74_step_ = l_self_state_list_l_self_state_keys_75_step_ = l_self_state_list_l_self_state_keys_76_step_ = l_self_state_list_l_self_state_keys_77_step_ = l_self_state_list_l_self_state_keys_78_step_ = l_self_state_list_l_self_state_keys_79_step_ = l_self_state_list_l_self_state_keys_80_step_ = l_self_state_list_l_self_state_keys_81_step_ = l_self_state_list_l_self_state_keys_82_step_ = l_self_state_list_l_self_state_keys_83_step_ = l_self_state_list_l_self_state_keys_84_step_ = l_self_state_list_l_self_state_keys_85_step_ = l_self_state_list_l_self_state_keys_86_step_ = l_self_state_list_l_self_state_keys_87_step_ = l_self_state_list_l_self_state_keys_88_step_ = l_self_state_list_l_self_state_keys_89_step_ = l_self_state_list_l_self_state_keys_90_step_ = l_self_state_list_l_self_state_keys_91_step_ = l_self_state_list_l_self_state_keys_92_step_ = l_self_state_list_l_self_state_keys_93_step_ = l_self_state_list_l_self_state_keys_94_step_ = l_self_state_list_l_self_state_keys_95_step_ = l_self_state_list_l_self_state_keys_96_step_ = l_self_state_list_l_self_state_keys_97_step_ = l_self_state_list_l_self_state_keys_98_step_ = l_self_state_list_l_self_state_keys_99_step_ = l_self_state_list_l_self_state_keys_100_step_ = l_self_state_list_l_self_state_keys_101_step_ = l_self_state_list_l_self_state_keys_102_step_ = l_self_state_list_l_self_state_keys_103_step_ = l_self_state_list_l_self_state_keys_104_step_ = l_self_state_list_l_self_state_keys_105_step_ = l_self_state_list_l_self_state_keys_106_step_ = l_self_state_list_l_self_state_keys_107_step_ = l_self_state_list_l_self_state_keys_108_step_ = l_self_state_list_l_self_state_keys_109_step_ = l_self_state_list_l_self_state_keys_110_step_ = l_self_state_list_l_self_state_keys_111_step_ = l_self_state_list_l_self_state_keys_112_step_ = l_self_state_list_l_self_state_keys_113_step_ = l_self_state_list_l_self_state_keys_114_step_ = l_self_state_list_l_self_state_keys_115_step_ = l_self_state_list_l_self_state_keys_116_step_ = l_self_state_list_l_self_state_keys_117_step_ = l_self_state_list_l_self_state_keys_118_step_ = l_self_state_list_l_self_state_keys_119_step_ = l_self_state_list_l_self_state_keys_120_step_ = l_self_state_list_l_self_state_keys_121_step_ = l_self_state_list_l_self_state_keys_122_step_ = l_self_state_list_l_self_state_keys_123_step_ = l_self_state_list_l_self_state_keys_124_step_ = l_self_state_list_l_self_state_keys_125_step_ = l_self_state_list_l_self_state_keys_126_step_ = l_self_state_list_l_self_state_keys_127_step_ = l_self_state_list_l_self_state_keys_128_step_ = l_self_state_list_l_self_state_keys_129_step_ = l_self_state_list_l_self_state_keys_130_step_ = l_self_state_list_l_self_state_keys_131_step_ = l_self_state_list_l_self_state_keys_132_step_ = l_self_state_list_l_self_state_keys_133_step_ = l_self_state_list_l_self_state_keys_134_step_ = l_self_state_list_l_self_state_keys_135_step_ = l_self_state_list_l_self_state_keys_136_step_ = l_self_state_list_l_self_state_keys_137_step_ = l_self_state_list_l_self_state_keys_138_step_ = l_self_state_list_l_self_state_keys_139_step_ = l_self_state_list_l_self_state_keys_140_step_ = l_self_state_list_l_self_state_keys_141_step_ = l_self_state_list_l_self_state_keys_142_step_ = l_self_state_list_l_self_state_keys_143_step_ = l_self_state_list_l_self_state_keys_144_step_ = l_self_state_list_l_self_state_keys_145_step_ = l_self_state_list_l_self_state_keys_146_step_ = l_self_state_list_l_self_state_keys_147_step_ = None + getitem_740: "f32[][]cuda:0" = _foreach_pow_1[0] + getitem_741: "f32[][]cuda:0" = _foreach_pow_1[1] + getitem_742: "f32[][]cuda:0" = _foreach_pow_1[2] + getitem_743: "f32[][]cuda:0" = _foreach_pow_1[3] + getitem_744: "f32[][]cuda:0" = _foreach_pow_1[4] + getitem_745: "f32[][]cuda:0" = _foreach_pow_1[5] + getitem_746: "f32[][]cuda:0" = _foreach_pow_1[6] + getitem_747: "f32[][]cuda:0" = _foreach_pow_1[7] + getitem_748: "f32[][]cuda:0" = _foreach_pow_1[8] + getitem_749: "f32[][]cuda:0" = _foreach_pow_1[9] + getitem_750: "f32[][]cuda:0" = _foreach_pow_1[10] + getitem_751: "f32[][]cuda:0" = _foreach_pow_1[11] + getitem_752: "f32[][]cuda:0" = _foreach_pow_1[12] + getitem_753: "f32[][]cuda:0" = _foreach_pow_1[13] + getitem_754: "f32[][]cuda:0" = _foreach_pow_1[14] + getitem_755: "f32[][]cuda:0" = _foreach_pow_1[15] + getitem_756: "f32[][]cuda:0" = _foreach_pow_1[16] + getitem_757: "f32[][]cuda:0" = _foreach_pow_1[17] + getitem_758: "f32[][]cuda:0" = _foreach_pow_1[18] + getitem_759: "f32[][]cuda:0" = _foreach_pow_1[19] + getitem_760: "f32[][]cuda:0" = _foreach_pow_1[20] + getitem_761: "f32[][]cuda:0" = _foreach_pow_1[21] + getitem_762: "f32[][]cuda:0" = _foreach_pow_1[22] + getitem_763: "f32[][]cuda:0" = _foreach_pow_1[23] + getitem_764: "f32[][]cuda:0" = _foreach_pow_1[24] + getitem_765: "f32[][]cuda:0" = _foreach_pow_1[25] + getitem_766: "f32[][]cuda:0" = _foreach_pow_1[26] + getitem_767: "f32[][]cuda:0" = _foreach_pow_1[27] + getitem_768: "f32[][]cuda:0" = _foreach_pow_1[28] + getitem_769: "f32[][]cuda:0" = _foreach_pow_1[29] + getitem_770: "f32[][]cuda:0" = _foreach_pow_1[30] + getitem_771: "f32[][]cuda:0" = _foreach_pow_1[31] + getitem_772: "f32[][]cuda:0" = _foreach_pow_1[32] + getitem_773: "f32[][]cuda:0" = _foreach_pow_1[33] + getitem_774: "f32[][]cuda:0" = _foreach_pow_1[34] + getitem_775: "f32[][]cuda:0" = _foreach_pow_1[35] + getitem_776: "f32[][]cuda:0" = _foreach_pow_1[36] + getitem_777: "f32[][]cuda:0" = _foreach_pow_1[37] + getitem_778: "f32[][]cuda:0" = _foreach_pow_1[38] + getitem_779: "f32[][]cuda:0" = _foreach_pow_1[39] + getitem_780: "f32[][]cuda:0" = _foreach_pow_1[40] + getitem_781: "f32[][]cuda:0" = _foreach_pow_1[41] + getitem_782: "f32[][]cuda:0" = _foreach_pow_1[42] + getitem_783: "f32[][]cuda:0" = _foreach_pow_1[43] + getitem_784: "f32[][]cuda:0" = _foreach_pow_1[44] + getitem_785: "f32[][]cuda:0" = _foreach_pow_1[45] + getitem_786: "f32[][]cuda:0" = _foreach_pow_1[46] + getitem_787: "f32[][]cuda:0" = _foreach_pow_1[47] + getitem_788: "f32[][]cuda:0" = _foreach_pow_1[48] + getitem_789: "f32[][]cuda:0" = _foreach_pow_1[49] + getitem_790: "f32[][]cuda:0" = _foreach_pow_1[50] + getitem_791: "f32[][]cuda:0" = _foreach_pow_1[51] + getitem_792: "f32[][]cuda:0" = _foreach_pow_1[52] + getitem_793: "f32[][]cuda:0" = _foreach_pow_1[53] + getitem_794: "f32[][]cuda:0" = _foreach_pow_1[54] + getitem_795: "f32[][]cuda:0" = _foreach_pow_1[55] + getitem_796: "f32[][]cuda:0" = _foreach_pow_1[56] + getitem_797: "f32[][]cuda:0" = _foreach_pow_1[57] + getitem_798: "f32[][]cuda:0" = _foreach_pow_1[58] + getitem_799: "f32[][]cuda:0" = _foreach_pow_1[59] + getitem_800: "f32[][]cuda:0" = _foreach_pow_1[60] + getitem_801: "f32[][]cuda:0" = _foreach_pow_1[61] + getitem_802: "f32[][]cuda:0" = _foreach_pow_1[62] + getitem_803: "f32[][]cuda:0" = _foreach_pow_1[63] + getitem_804: "f32[][]cuda:0" = _foreach_pow_1[64] + getitem_805: "f32[][]cuda:0" = _foreach_pow_1[65] + getitem_806: "f32[][]cuda:0" = _foreach_pow_1[66] + getitem_807: "f32[][]cuda:0" = _foreach_pow_1[67] + getitem_808: "f32[][]cuda:0" = _foreach_pow_1[68] + getitem_809: "f32[][]cuda:0" = _foreach_pow_1[69] + getitem_810: "f32[][]cuda:0" = _foreach_pow_1[70] + getitem_811: "f32[][]cuda:0" = _foreach_pow_1[71] + getitem_812: "f32[][]cuda:0" = _foreach_pow_1[72] + getitem_813: "f32[][]cuda:0" = _foreach_pow_1[73] + getitem_814: "f32[][]cuda:0" = _foreach_pow_1[74] + getitem_815: "f32[][]cuda:0" = _foreach_pow_1[75] + getitem_816: "f32[][]cuda:0" = _foreach_pow_1[76] + getitem_817: "f32[][]cuda:0" = _foreach_pow_1[77] + getitem_818: "f32[][]cuda:0" = _foreach_pow_1[78] + getitem_819: "f32[][]cuda:0" = _foreach_pow_1[79] + getitem_820: "f32[][]cuda:0" = _foreach_pow_1[80] + getitem_821: "f32[][]cuda:0" = _foreach_pow_1[81] + getitem_822: "f32[][]cuda:0" = _foreach_pow_1[82] + getitem_823: "f32[][]cuda:0" = _foreach_pow_1[83] + getitem_824: "f32[][]cuda:0" = _foreach_pow_1[84] + getitem_825: "f32[][]cuda:0" = _foreach_pow_1[85] + getitem_826: "f32[][]cuda:0" = _foreach_pow_1[86] + getitem_827: "f32[][]cuda:0" = _foreach_pow_1[87] + getitem_828: "f32[][]cuda:0" = _foreach_pow_1[88] + getitem_829: "f32[][]cuda:0" = _foreach_pow_1[89] + getitem_830: "f32[][]cuda:0" = _foreach_pow_1[90] + getitem_831: "f32[][]cuda:0" = _foreach_pow_1[91] + getitem_832: "f32[][]cuda:0" = _foreach_pow_1[92] + getitem_833: "f32[][]cuda:0" = _foreach_pow_1[93] + getitem_834: "f32[][]cuda:0" = _foreach_pow_1[94] + getitem_835: "f32[][]cuda:0" = _foreach_pow_1[95] + getitem_836: "f32[][]cuda:0" = _foreach_pow_1[96] + getitem_837: "f32[][]cuda:0" = _foreach_pow_1[97] + getitem_838: "f32[][]cuda:0" = _foreach_pow_1[98] + getitem_839: "f32[][]cuda:0" = _foreach_pow_1[99] + getitem_840: "f32[][]cuda:0" = _foreach_pow_1[100] + getitem_841: "f32[][]cuda:0" = _foreach_pow_1[101] + getitem_842: "f32[][]cuda:0" = _foreach_pow_1[102] + getitem_843: "f32[][]cuda:0" = _foreach_pow_1[103] + getitem_844: "f32[][]cuda:0" = _foreach_pow_1[104] + getitem_845: "f32[][]cuda:0" = _foreach_pow_1[105] + getitem_846: "f32[][]cuda:0" = _foreach_pow_1[106] + getitem_847: "f32[][]cuda:0" = _foreach_pow_1[107] + getitem_848: "f32[][]cuda:0" = _foreach_pow_1[108] + getitem_849: "f32[][]cuda:0" = _foreach_pow_1[109] + getitem_850: "f32[][]cuda:0" = _foreach_pow_1[110] + getitem_851: "f32[][]cuda:0" = _foreach_pow_1[111] + getitem_852: "f32[][]cuda:0" = _foreach_pow_1[112] + getitem_853: "f32[][]cuda:0" = _foreach_pow_1[113] + getitem_854: "f32[][]cuda:0" = _foreach_pow_1[114] + getitem_855: "f32[][]cuda:0" = _foreach_pow_1[115] + getitem_856: "f32[][]cuda:0" = _foreach_pow_1[116] + getitem_857: "f32[][]cuda:0" = _foreach_pow_1[117] + getitem_858: "f32[][]cuda:0" = _foreach_pow_1[118] + getitem_859: "f32[][]cuda:0" = _foreach_pow_1[119] + getitem_860: "f32[][]cuda:0" = _foreach_pow_1[120] + getitem_861: "f32[][]cuda:0" = _foreach_pow_1[121] + getitem_862: "f32[][]cuda:0" = _foreach_pow_1[122] + getitem_863: "f32[][]cuda:0" = _foreach_pow_1[123] + getitem_864: "f32[][]cuda:0" = _foreach_pow_1[124] + getitem_865: "f32[][]cuda:0" = _foreach_pow_1[125] + getitem_866: "f32[][]cuda:0" = _foreach_pow_1[126] + getitem_867: "f32[][]cuda:0" = _foreach_pow_1[127] + getitem_868: "f32[][]cuda:0" = _foreach_pow_1[128] + getitem_869: "f32[][]cuda:0" = _foreach_pow_1[129] + getitem_870: "f32[][]cuda:0" = _foreach_pow_1[130] + getitem_871: "f32[][]cuda:0" = _foreach_pow_1[131] + getitem_872: "f32[][]cuda:0" = _foreach_pow_1[132] + getitem_873: "f32[][]cuda:0" = _foreach_pow_1[133] + getitem_874: "f32[][]cuda:0" = _foreach_pow_1[134] + getitem_875: "f32[][]cuda:0" = _foreach_pow_1[135] + getitem_876: "f32[][]cuda:0" = _foreach_pow_1[136] + getitem_877: "f32[][]cuda:0" = _foreach_pow_1[137] + getitem_878: "f32[][]cuda:0" = _foreach_pow_1[138] + getitem_879: "f32[][]cuda:0" = _foreach_pow_1[139] + getitem_880: "f32[][]cuda:0" = _foreach_pow_1[140] + getitem_881: "f32[][]cuda:0" = _foreach_pow_1[141] + getitem_882: "f32[][]cuda:0" = _foreach_pow_1[142] + getitem_883: "f32[][]cuda:0" = _foreach_pow_1[143] + getitem_884: "f32[][]cuda:0" = _foreach_pow_1[144] + getitem_885: "f32[][]cuda:0" = _foreach_pow_1[145] + getitem_886: "f32[][]cuda:0" = _foreach_pow_1[146] + getitem_887: "f32[][]cuda:0" = _foreach_pow_1[147]; _foreach_pow_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1) + _foreach_sub_ = torch._foreach_sub_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739), 1); _foreach_sub_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1) + _foreach_sub__1 = torch._foreach_sub_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887), 1); _foreach_sub__1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2) + _foreach_neg_ = torch._foreach_neg_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887)); _foreach_neg_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr) + _foreach_div_ = torch._foreach_div_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739), 0.01); _foreach_div_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1) + _foreach_reciprocal_ = torch._foreach_reciprocal_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739)); _foreach_reciprocal_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2) + _foreach_sqrt_ = torch._foreach_sqrt_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887)); _foreach_sqrt_ = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs) + _foreach_sqrt = torch._foreach_sqrt([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_]); l_self_state_list_l_self_state_keys_0_exp_avg_sq_ = l_self_state_list_l_self_state_keys_1_exp_avg_sq_ = l_self_state_list_l_self_state_keys_2_exp_avg_sq_ = l_self_state_list_l_self_state_keys_3_exp_avg_sq_ = l_self_state_list_l_self_state_keys_4_exp_avg_sq_ = l_self_state_list_l_self_state_keys_5_exp_avg_sq_ = l_self_state_list_l_self_state_keys_6_exp_avg_sq_ = l_self_state_list_l_self_state_keys_7_exp_avg_sq_ = l_self_state_list_l_self_state_keys_8_exp_avg_sq_ = l_self_state_list_l_self_state_keys_9_exp_avg_sq_ = l_self_state_list_l_self_state_keys_10_exp_avg_sq_ = l_self_state_list_l_self_state_keys_11_exp_avg_sq_ = l_self_state_list_l_self_state_keys_12_exp_avg_sq_ = l_self_state_list_l_self_state_keys_13_exp_avg_sq_ = l_self_state_list_l_self_state_keys_14_exp_avg_sq_ = l_self_state_list_l_self_state_keys_15_exp_avg_sq_ = l_self_state_list_l_self_state_keys_16_exp_avg_sq_ = l_self_state_list_l_self_state_keys_17_exp_avg_sq_ = l_self_state_list_l_self_state_keys_18_exp_avg_sq_ = l_self_state_list_l_self_state_keys_19_exp_avg_sq_ = l_self_state_list_l_self_state_keys_20_exp_avg_sq_ = l_self_state_list_l_self_state_keys_21_exp_avg_sq_ = l_self_state_list_l_self_state_keys_22_exp_avg_sq_ = l_self_state_list_l_self_state_keys_23_exp_avg_sq_ = l_self_state_list_l_self_state_keys_24_exp_avg_sq_ = l_self_state_list_l_self_state_keys_25_exp_avg_sq_ = l_self_state_list_l_self_state_keys_26_exp_avg_sq_ = l_self_state_list_l_self_state_keys_27_exp_avg_sq_ = l_self_state_list_l_self_state_keys_28_exp_avg_sq_ = l_self_state_list_l_self_state_keys_29_exp_avg_sq_ = l_self_state_list_l_self_state_keys_30_exp_avg_sq_ = l_self_state_list_l_self_state_keys_31_exp_avg_sq_ = l_self_state_list_l_self_state_keys_32_exp_avg_sq_ = l_self_state_list_l_self_state_keys_33_exp_avg_sq_ = l_self_state_list_l_self_state_keys_34_exp_avg_sq_ = l_self_state_list_l_self_state_keys_35_exp_avg_sq_ = l_self_state_list_l_self_state_keys_36_exp_avg_sq_ = l_self_state_list_l_self_state_keys_37_exp_avg_sq_ = l_self_state_list_l_self_state_keys_38_exp_avg_sq_ = l_self_state_list_l_self_state_keys_39_exp_avg_sq_ = l_self_state_list_l_self_state_keys_40_exp_avg_sq_ = l_self_state_list_l_self_state_keys_41_exp_avg_sq_ = l_self_state_list_l_self_state_keys_42_exp_avg_sq_ = l_self_state_list_l_self_state_keys_43_exp_avg_sq_ = l_self_state_list_l_self_state_keys_44_exp_avg_sq_ = l_self_state_list_l_self_state_keys_45_exp_avg_sq_ = l_self_state_list_l_self_state_keys_46_exp_avg_sq_ = l_self_state_list_l_self_state_keys_47_exp_avg_sq_ = l_self_state_list_l_self_state_keys_48_exp_avg_sq_ = l_self_state_list_l_self_state_keys_49_exp_avg_sq_ = l_self_state_list_l_self_state_keys_50_exp_avg_sq_ = l_self_state_list_l_self_state_keys_51_exp_avg_sq_ = l_self_state_list_l_self_state_keys_52_exp_avg_sq_ = l_self_state_list_l_self_state_keys_53_exp_avg_sq_ = l_self_state_list_l_self_state_keys_54_exp_avg_sq_ = l_self_state_list_l_self_state_keys_55_exp_avg_sq_ = l_self_state_list_l_self_state_keys_56_exp_avg_sq_ = l_self_state_list_l_self_state_keys_57_exp_avg_sq_ = l_self_state_list_l_self_state_keys_58_exp_avg_sq_ = l_self_state_list_l_self_state_keys_59_exp_avg_sq_ = l_self_state_list_l_self_state_keys_60_exp_avg_sq_ = l_self_state_list_l_self_state_keys_61_exp_avg_sq_ = l_self_state_list_l_self_state_keys_62_exp_avg_sq_ = l_self_state_list_l_self_state_keys_63_exp_avg_sq_ = l_self_state_list_l_self_state_keys_64_exp_avg_sq_ = l_self_state_list_l_self_state_keys_65_exp_avg_sq_ = l_self_state_list_l_self_state_keys_66_exp_avg_sq_ = l_self_state_list_l_self_state_keys_67_exp_avg_sq_ = l_self_state_list_l_self_state_keys_68_exp_avg_sq_ = l_self_state_list_l_self_state_keys_69_exp_avg_sq_ = l_self_state_list_l_self_state_keys_70_exp_avg_sq_ = l_self_state_list_l_self_state_keys_71_exp_avg_sq_ = l_self_state_list_l_self_state_keys_72_exp_avg_sq_ = l_self_state_list_l_self_state_keys_73_exp_avg_sq_ = l_self_state_list_l_self_state_keys_74_exp_avg_sq_ = l_self_state_list_l_self_state_keys_75_exp_avg_sq_ = l_self_state_list_l_self_state_keys_76_exp_avg_sq_ = l_self_state_list_l_self_state_keys_77_exp_avg_sq_ = l_self_state_list_l_self_state_keys_78_exp_avg_sq_ = l_self_state_list_l_self_state_keys_79_exp_avg_sq_ = l_self_state_list_l_self_state_keys_80_exp_avg_sq_ = l_self_state_list_l_self_state_keys_81_exp_avg_sq_ = l_self_state_list_l_self_state_keys_82_exp_avg_sq_ = l_self_state_list_l_self_state_keys_83_exp_avg_sq_ = l_self_state_list_l_self_state_keys_84_exp_avg_sq_ = l_self_state_list_l_self_state_keys_85_exp_avg_sq_ = l_self_state_list_l_self_state_keys_86_exp_avg_sq_ = l_self_state_list_l_self_state_keys_87_exp_avg_sq_ = l_self_state_list_l_self_state_keys_88_exp_avg_sq_ = l_self_state_list_l_self_state_keys_89_exp_avg_sq_ = l_self_state_list_l_self_state_keys_90_exp_avg_sq_ = l_self_state_list_l_self_state_keys_91_exp_avg_sq_ = l_self_state_list_l_self_state_keys_92_exp_avg_sq_ = l_self_state_list_l_self_state_keys_93_exp_avg_sq_ = l_self_state_list_l_self_state_keys_94_exp_avg_sq_ = l_self_state_list_l_self_state_keys_95_exp_avg_sq_ = l_self_state_list_l_self_state_keys_96_exp_avg_sq_ = l_self_state_list_l_self_state_keys_97_exp_avg_sq_ = l_self_state_list_l_self_state_keys_98_exp_avg_sq_ = l_self_state_list_l_self_state_keys_99_exp_avg_sq_ = l_self_state_list_l_self_state_keys_100_exp_avg_sq_ = l_self_state_list_l_self_state_keys_101_exp_avg_sq_ = l_self_state_list_l_self_state_keys_102_exp_avg_sq_ = l_self_state_list_l_self_state_keys_103_exp_avg_sq_ = l_self_state_list_l_self_state_keys_104_exp_avg_sq_ = l_self_state_list_l_self_state_keys_105_exp_avg_sq_ = l_self_state_list_l_self_state_keys_106_exp_avg_sq_ = l_self_state_list_l_self_state_keys_107_exp_avg_sq_ = l_self_state_list_l_self_state_keys_108_exp_avg_sq_ = l_self_state_list_l_self_state_keys_109_exp_avg_sq_ = l_self_state_list_l_self_state_keys_110_exp_avg_sq_ = l_self_state_list_l_self_state_keys_111_exp_avg_sq_ = l_self_state_list_l_self_state_keys_112_exp_avg_sq_ = l_self_state_list_l_self_state_keys_113_exp_avg_sq_ = l_self_state_list_l_self_state_keys_114_exp_avg_sq_ = l_self_state_list_l_self_state_keys_115_exp_avg_sq_ = l_self_state_list_l_self_state_keys_116_exp_avg_sq_ = l_self_state_list_l_self_state_keys_117_exp_avg_sq_ = l_self_state_list_l_self_state_keys_118_exp_avg_sq_ = l_self_state_list_l_self_state_keys_119_exp_avg_sq_ = l_self_state_list_l_self_state_keys_120_exp_avg_sq_ = l_self_state_list_l_self_state_keys_121_exp_avg_sq_ = l_self_state_list_l_self_state_keys_122_exp_avg_sq_ = l_self_state_list_l_self_state_keys_123_exp_avg_sq_ = l_self_state_list_l_self_state_keys_124_exp_avg_sq_ = l_self_state_list_l_self_state_keys_125_exp_avg_sq_ = l_self_state_list_l_self_state_keys_126_exp_avg_sq_ = l_self_state_list_l_self_state_keys_127_exp_avg_sq_ = l_self_state_list_l_self_state_keys_128_exp_avg_sq_ = l_self_state_list_l_self_state_keys_129_exp_avg_sq_ = l_self_state_list_l_self_state_keys_130_exp_avg_sq_ = l_self_state_list_l_self_state_keys_131_exp_avg_sq_ = l_self_state_list_l_self_state_keys_132_exp_avg_sq_ = l_self_state_list_l_self_state_keys_133_exp_avg_sq_ = l_self_state_list_l_self_state_keys_134_exp_avg_sq_ = l_self_state_list_l_self_state_keys_135_exp_avg_sq_ = l_self_state_list_l_self_state_keys_136_exp_avg_sq_ = l_self_state_list_l_self_state_keys_137_exp_avg_sq_ = l_self_state_list_l_self_state_keys_138_exp_avg_sq_ = l_self_state_list_l_self_state_keys_139_exp_avg_sq_ = l_self_state_list_l_self_state_keys_140_exp_avg_sq_ = l_self_state_list_l_self_state_keys_141_exp_avg_sq_ = l_self_state_list_l_self_state_keys_142_exp_avg_sq_ = l_self_state_list_l_self_state_keys_143_exp_avg_sq_ = l_self_state_list_l_self_state_keys_144_exp_avg_sq_ = l_self_state_list_l_self_state_keys_145_exp_avg_sq_ = l_self_state_list_l_self_state_keys_146_exp_avg_sq_ = l_self_state_list_l_self_state_keys_147_exp_avg_sq_ = None + getitem_1776: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt[0] + getitem_1777: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt[1] + getitem_1778: "f32[768][1]cuda:0" = _foreach_sqrt[2] + getitem_1779: "f32[768][1]cuda:0" = _foreach_sqrt[3] + getitem_1780: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[4] + getitem_1781: "f32[2304][1]cuda:0" = _foreach_sqrt[5] + getitem_1782: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[6] + getitem_1783: "f32[768][1]cuda:0" = _foreach_sqrt[7] + getitem_1784: "f32[768][1]cuda:0" = _foreach_sqrt[8] + getitem_1785: "f32[768][1]cuda:0" = _foreach_sqrt[9] + getitem_1786: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[10] + getitem_1787: "f32[3072][1]cuda:0" = _foreach_sqrt[11] + getitem_1788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[12] + getitem_1789: "f32[768][1]cuda:0" = _foreach_sqrt[13] + getitem_1790: "f32[768][1]cuda:0" = _foreach_sqrt[14] + getitem_1791: "f32[768][1]cuda:0" = _foreach_sqrt[15] + getitem_1792: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[16] + getitem_1793: "f32[2304][1]cuda:0" = _foreach_sqrt[17] + getitem_1794: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[18] + getitem_1795: "f32[768][1]cuda:0" = _foreach_sqrt[19] + getitem_1796: "f32[768][1]cuda:0" = _foreach_sqrt[20] + getitem_1797: "f32[768][1]cuda:0" = _foreach_sqrt[21] + getitem_1798: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[22] + getitem_1799: "f32[3072][1]cuda:0" = _foreach_sqrt[23] + getitem_1800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[24] + getitem_1801: "f32[768][1]cuda:0" = _foreach_sqrt[25] + getitem_1802: "f32[768][1]cuda:0" = _foreach_sqrt[26] + getitem_1803: "f32[768][1]cuda:0" = _foreach_sqrt[27] + getitem_1804: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[28] + getitem_1805: "f32[2304][1]cuda:0" = _foreach_sqrt[29] + getitem_1806: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[30] + getitem_1807: "f32[768][1]cuda:0" = _foreach_sqrt[31] + getitem_1808: "f32[768][1]cuda:0" = _foreach_sqrt[32] + getitem_1809: "f32[768][1]cuda:0" = _foreach_sqrt[33] + getitem_1810: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[34] + getitem_1811: "f32[3072][1]cuda:0" = _foreach_sqrt[35] + getitem_1812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[36] + getitem_1813: "f32[768][1]cuda:0" = _foreach_sqrt[37] + getitem_1814: "f32[768][1]cuda:0" = _foreach_sqrt[38] + getitem_1815: "f32[768][1]cuda:0" = _foreach_sqrt[39] + getitem_1816: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[40] + getitem_1817: "f32[2304][1]cuda:0" = _foreach_sqrt[41] + getitem_1818: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[42] + getitem_1819: "f32[768][1]cuda:0" = _foreach_sqrt[43] + getitem_1820: "f32[768][1]cuda:0" = _foreach_sqrt[44] + getitem_1821: "f32[768][1]cuda:0" = _foreach_sqrt[45] + getitem_1822: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[46] + getitem_1823: "f32[3072][1]cuda:0" = _foreach_sqrt[47] + getitem_1824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[48] + getitem_1825: "f32[768][1]cuda:0" = _foreach_sqrt[49] + getitem_1826: "f32[768][1]cuda:0" = _foreach_sqrt[50] + getitem_1827: "f32[768][1]cuda:0" = _foreach_sqrt[51] + getitem_1828: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[52] + getitem_1829: "f32[2304][1]cuda:0" = _foreach_sqrt[53] + getitem_1830: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[54] + getitem_1831: "f32[768][1]cuda:0" = _foreach_sqrt[55] + getitem_1832: "f32[768][1]cuda:0" = _foreach_sqrt[56] + getitem_1833: "f32[768][1]cuda:0" = _foreach_sqrt[57] + getitem_1834: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[58] + getitem_1835: "f32[3072][1]cuda:0" = _foreach_sqrt[59] + getitem_1836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[60] + getitem_1837: "f32[768][1]cuda:0" = _foreach_sqrt[61] + getitem_1838: "f32[768][1]cuda:0" = _foreach_sqrt[62] + getitem_1839: "f32[768][1]cuda:0" = _foreach_sqrt[63] + getitem_1840: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[64] + getitem_1841: "f32[2304][1]cuda:0" = _foreach_sqrt[65] + getitem_1842: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[66] + getitem_1843: "f32[768][1]cuda:0" = _foreach_sqrt[67] + getitem_1844: "f32[768][1]cuda:0" = _foreach_sqrt[68] + getitem_1845: "f32[768][1]cuda:0" = _foreach_sqrt[69] + getitem_1846: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[70] + getitem_1847: "f32[3072][1]cuda:0" = _foreach_sqrt[71] + getitem_1848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[72] + getitem_1849: "f32[768][1]cuda:0" = _foreach_sqrt[73] + getitem_1850: "f32[768][1]cuda:0" = _foreach_sqrt[74] + getitem_1851: "f32[768][1]cuda:0" = _foreach_sqrt[75] + getitem_1852: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[76] + getitem_1853: "f32[2304][1]cuda:0" = _foreach_sqrt[77] + getitem_1854: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[78] + getitem_1855: "f32[768][1]cuda:0" = _foreach_sqrt[79] + getitem_1856: "f32[768][1]cuda:0" = _foreach_sqrt[80] + getitem_1857: "f32[768][1]cuda:0" = _foreach_sqrt[81] + getitem_1858: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[82] + getitem_1859: "f32[3072][1]cuda:0" = _foreach_sqrt[83] + getitem_1860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[84] + getitem_1861: "f32[768][1]cuda:0" = _foreach_sqrt[85] + getitem_1862: "f32[768][1]cuda:0" = _foreach_sqrt[86] + getitem_1863: "f32[768][1]cuda:0" = _foreach_sqrt[87] + getitem_1864: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[88] + getitem_1865: "f32[2304][1]cuda:0" = _foreach_sqrt[89] + getitem_1866: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[90] + getitem_1867: "f32[768][1]cuda:0" = _foreach_sqrt[91] + getitem_1868: "f32[768][1]cuda:0" = _foreach_sqrt[92] + getitem_1869: "f32[768][1]cuda:0" = _foreach_sqrt[93] + getitem_1870: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[94] + getitem_1871: "f32[3072][1]cuda:0" = _foreach_sqrt[95] + getitem_1872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[96] + getitem_1873: "f32[768][1]cuda:0" = _foreach_sqrt[97] + getitem_1874: "f32[768][1]cuda:0" = _foreach_sqrt[98] + getitem_1875: "f32[768][1]cuda:0" = _foreach_sqrt[99] + getitem_1876: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[100] + getitem_1877: "f32[2304][1]cuda:0" = _foreach_sqrt[101] + getitem_1878: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[102] + getitem_1879: "f32[768][1]cuda:0" = _foreach_sqrt[103] + getitem_1880: "f32[768][1]cuda:0" = _foreach_sqrt[104] + getitem_1881: "f32[768][1]cuda:0" = _foreach_sqrt[105] + getitem_1882: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[106] + getitem_1883: "f32[3072][1]cuda:0" = _foreach_sqrt[107] + getitem_1884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[108] + getitem_1885: "f32[768][1]cuda:0" = _foreach_sqrt[109] + getitem_1886: "f32[768][1]cuda:0" = _foreach_sqrt[110] + getitem_1887: "f32[768][1]cuda:0" = _foreach_sqrt[111] + getitem_1888: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[112] + getitem_1889: "f32[2304][1]cuda:0" = _foreach_sqrt[113] + getitem_1890: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[114] + getitem_1891: "f32[768][1]cuda:0" = _foreach_sqrt[115] + getitem_1892: "f32[768][1]cuda:0" = _foreach_sqrt[116] + getitem_1893: "f32[768][1]cuda:0" = _foreach_sqrt[117] + getitem_1894: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[118] + getitem_1895: "f32[3072][1]cuda:0" = _foreach_sqrt[119] + getitem_1896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[120] + getitem_1897: "f32[768][1]cuda:0" = _foreach_sqrt[121] + getitem_1898: "f32[768][1]cuda:0" = _foreach_sqrt[122] + getitem_1899: "f32[768][1]cuda:0" = _foreach_sqrt[123] + getitem_1900: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[124] + getitem_1901: "f32[2304][1]cuda:0" = _foreach_sqrt[125] + getitem_1902: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[126] + getitem_1903: "f32[768][1]cuda:0" = _foreach_sqrt[127] + getitem_1904: "f32[768][1]cuda:0" = _foreach_sqrt[128] + getitem_1905: "f32[768][1]cuda:0" = _foreach_sqrt[129] + getitem_1906: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[130] + getitem_1907: "f32[3072][1]cuda:0" = _foreach_sqrt[131] + getitem_1908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[132] + getitem_1909: "f32[768][1]cuda:0" = _foreach_sqrt[133] + getitem_1910: "f32[768][1]cuda:0" = _foreach_sqrt[134] + getitem_1911: "f32[768][1]cuda:0" = _foreach_sqrt[135] + getitem_1912: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[136] + getitem_1913: "f32[2304][1]cuda:0" = _foreach_sqrt[137] + getitem_1914: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[138] + getitem_1915: "f32[768][1]cuda:0" = _foreach_sqrt[139] + getitem_1916: "f32[768][1]cuda:0" = _foreach_sqrt[140] + getitem_1917: "f32[768][1]cuda:0" = _foreach_sqrt[141] + getitem_1918: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[142] + getitem_1919: "f32[3072][1]cuda:0" = _foreach_sqrt[143] + getitem_1920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[144] + getitem_1921: "f32[768][1]cuda:0" = _foreach_sqrt[145] + getitem_1922: "f32[768][1]cuda:0" = _foreach_sqrt[146] + getitem_1923: "f32[768][1]cuda:0" = _foreach_sqrt[147]; _foreach_sqrt = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt) + _foreach_div__1 = torch._foreach_div_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), (getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887)); getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = _foreach_div__1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps) + _foreach_add__1 = torch._foreach_add_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), 1e-08); _foreach_add__1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size) + _foreach_div__2 = torch._foreach_div_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), (getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739)); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = _foreach_div__2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt) + _foreach_addcdiv_ = torch._foreach_addcdiv_([l_self_param_groups_0_params_0_, l_self_param_groups_0_params_1_, l_self_param_groups_0_params_2_, l_self_param_groups_0_params_3_, l_self_param_groups_0_params_4_, l_self_param_groups_0_params_5_, l_self_param_groups_0_params_6_, l_self_param_groups_0_params_7_, l_self_param_groups_0_params_8_, l_self_param_groups_0_params_9_, l_self_param_groups_0_params_10_, l_self_param_groups_0_params_11_, l_self_param_groups_0_params_12_, l_self_param_groups_0_params_13_, l_self_param_groups_0_params_14_, l_self_param_groups_0_params_15_, l_self_param_groups_0_params_16_, l_self_param_groups_0_params_17_, l_self_param_groups_0_params_18_, l_self_param_groups_0_params_19_, l_self_param_groups_0_params_20_, l_self_param_groups_0_params_21_, l_self_param_groups_0_params_22_, l_self_param_groups_0_params_23_, l_self_param_groups_0_params_24_, l_self_param_groups_0_params_25_, l_self_param_groups_0_params_26_, l_self_param_groups_0_params_27_, l_self_param_groups_0_params_28_, l_self_param_groups_0_params_29_, l_self_param_groups_0_params_30_, l_self_param_groups_0_params_31_, l_self_param_groups_0_params_32_, l_self_param_groups_0_params_33_, l_self_param_groups_0_params_34_, l_self_param_groups_0_params_35_, l_self_param_groups_0_params_36_, l_self_param_groups_0_params_37_, l_self_param_groups_0_params_38_, l_self_param_groups_0_params_39_, l_self_param_groups_0_params_40_, l_self_param_groups_0_params_41_, l_self_param_groups_0_params_42_, l_self_param_groups_0_params_43_, l_self_param_groups_0_params_44_, l_self_param_groups_0_params_45_, l_self_param_groups_0_params_46_, l_self_param_groups_0_params_47_, l_self_param_groups_0_params_48_, l_self_param_groups_0_params_49_, l_self_param_groups_0_params_50_, l_self_param_groups_0_params_51_, l_self_param_groups_0_params_52_, l_self_param_groups_0_params_53_, l_self_param_groups_0_params_54_, l_self_param_groups_0_params_55_, l_self_param_groups_0_params_56_, l_self_param_groups_0_params_57_, l_self_param_groups_0_params_58_, l_self_param_groups_0_params_59_, l_self_param_groups_0_params_60_, l_self_param_groups_0_params_61_, l_self_param_groups_0_params_62_, l_self_param_groups_0_params_63_, l_self_param_groups_0_params_64_, l_self_param_groups_0_params_65_, l_self_param_groups_0_params_66_, l_self_param_groups_0_params_67_, l_self_param_groups_0_params_68_, l_self_param_groups_0_params_69_, l_self_param_groups_0_params_70_, l_self_param_groups_0_params_71_, l_self_param_groups_0_params_72_, l_self_param_groups_0_params_73_, l_self_param_groups_0_params_74_, l_self_param_groups_0_params_75_, l_self_param_groups_0_params_76_, l_self_param_groups_0_params_77_, l_self_param_groups_0_params_78_, l_self_param_groups_0_params_79_, l_self_param_groups_0_params_80_, l_self_param_groups_0_params_81_, l_self_param_groups_0_params_82_, l_self_param_groups_0_params_83_, l_self_param_groups_0_params_84_, l_self_param_groups_0_params_85_, l_self_param_groups_0_params_86_, l_self_param_groups_0_params_87_, l_self_param_groups_0_params_88_, l_self_param_groups_0_params_89_, l_self_param_groups_0_params_90_, l_self_param_groups_0_params_91_, l_self_param_groups_0_params_92_, l_self_param_groups_0_params_93_, l_self_param_groups_0_params_94_, l_self_param_groups_0_params_95_, l_self_param_groups_0_params_96_, l_self_param_groups_0_params_97_, l_self_param_groups_0_params_98_, l_self_param_groups_0_params_99_, l_self_param_groups_0_params_100_, l_self_param_groups_0_params_101_, l_self_param_groups_0_params_102_, l_self_param_groups_0_params_103_, l_self_param_groups_0_params_104_, l_self_param_groups_0_params_105_, l_self_param_groups_0_params_106_, l_self_param_groups_0_params_107_, l_self_param_groups_0_params_108_, l_self_param_groups_0_params_109_, l_self_param_groups_0_params_110_, l_self_param_groups_0_params_111_, l_self_param_groups_0_params_112_, l_self_param_groups_0_params_113_, l_self_param_groups_0_params_114_, l_self_param_groups_0_params_115_, l_self_param_groups_0_params_116_, l_self_param_groups_0_params_117_, l_self_param_groups_0_params_118_, l_self_param_groups_0_params_119_, l_self_param_groups_0_params_120_, l_self_param_groups_0_params_121_, l_self_param_groups_0_params_122_, l_self_param_groups_0_params_123_, l_self_param_groups_0_params_124_, l_self_param_groups_0_params_125_, l_self_param_groups_0_params_126_, l_self_param_groups_0_params_127_, l_self_param_groups_0_params_128_, l_self_param_groups_0_params_129_, l_self_param_groups_0_params_130_, l_self_param_groups_0_params_131_, l_self_param_groups_0_params_132_, l_self_param_groups_0_params_133_, l_self_param_groups_0_params_134_, l_self_param_groups_0_params_135_, l_self_param_groups_0_params_136_, l_self_param_groups_0_params_137_, l_self_param_groups_0_params_138_, l_self_param_groups_0_params_139_, l_self_param_groups_0_params_140_, l_self_param_groups_0_params_141_, l_self_param_groups_0_params_142_, l_self_param_groups_0_params_143_, l_self_param_groups_0_params_144_, l_self_param_groups_0_params_145_, l_self_param_groups_0_params_146_, l_self_param_groups_0_params_147_], [l_self_state_list_l_self_state_keys_0_exp_avg_, l_self_state_list_l_self_state_keys_1_exp_avg_, l_self_state_list_l_self_state_keys_2_exp_avg_, l_self_state_list_l_self_state_keys_3_exp_avg_, l_self_state_list_l_self_state_keys_4_exp_avg_, l_self_state_list_l_self_state_keys_5_exp_avg_, l_self_state_list_l_self_state_keys_6_exp_avg_, l_self_state_list_l_self_state_keys_7_exp_avg_, l_self_state_list_l_self_state_keys_8_exp_avg_, l_self_state_list_l_self_state_keys_9_exp_avg_, l_self_state_list_l_self_state_keys_10_exp_avg_, l_self_state_list_l_self_state_keys_11_exp_avg_, l_self_state_list_l_self_state_keys_12_exp_avg_, l_self_state_list_l_self_state_keys_13_exp_avg_, l_self_state_list_l_self_state_keys_14_exp_avg_, l_self_state_list_l_self_state_keys_15_exp_avg_, l_self_state_list_l_self_state_keys_16_exp_avg_, l_self_state_list_l_self_state_keys_17_exp_avg_, l_self_state_list_l_self_state_keys_18_exp_avg_, l_self_state_list_l_self_state_keys_19_exp_avg_, l_self_state_list_l_self_state_keys_20_exp_avg_, l_self_state_list_l_self_state_keys_21_exp_avg_, l_self_state_list_l_self_state_keys_22_exp_avg_, l_self_state_list_l_self_state_keys_23_exp_avg_, l_self_state_list_l_self_state_keys_24_exp_avg_, l_self_state_list_l_self_state_keys_25_exp_avg_, l_self_state_list_l_self_state_keys_26_exp_avg_, l_self_state_list_l_self_state_keys_27_exp_avg_, l_self_state_list_l_self_state_keys_28_exp_avg_, l_self_state_list_l_self_state_keys_29_exp_avg_, l_self_state_list_l_self_state_keys_30_exp_avg_, l_self_state_list_l_self_state_keys_31_exp_avg_, l_self_state_list_l_self_state_keys_32_exp_avg_, l_self_state_list_l_self_state_keys_33_exp_avg_, l_self_state_list_l_self_state_keys_34_exp_avg_, l_self_state_list_l_self_state_keys_35_exp_avg_, l_self_state_list_l_self_state_keys_36_exp_avg_, l_self_state_list_l_self_state_keys_37_exp_avg_, l_self_state_list_l_self_state_keys_38_exp_avg_, l_self_state_list_l_self_state_keys_39_exp_avg_, l_self_state_list_l_self_state_keys_40_exp_avg_, l_self_state_list_l_self_state_keys_41_exp_avg_, l_self_state_list_l_self_state_keys_42_exp_avg_, l_self_state_list_l_self_state_keys_43_exp_avg_, l_self_state_list_l_self_state_keys_44_exp_avg_, l_self_state_list_l_self_state_keys_45_exp_avg_, l_self_state_list_l_self_state_keys_46_exp_avg_, l_self_state_list_l_self_state_keys_47_exp_avg_, l_self_state_list_l_self_state_keys_48_exp_avg_, l_self_state_list_l_self_state_keys_49_exp_avg_, l_self_state_list_l_self_state_keys_50_exp_avg_, l_self_state_list_l_self_state_keys_51_exp_avg_, l_self_state_list_l_self_state_keys_52_exp_avg_, l_self_state_list_l_self_state_keys_53_exp_avg_, l_self_state_list_l_self_state_keys_54_exp_avg_, l_self_state_list_l_self_state_keys_55_exp_avg_, l_self_state_list_l_self_state_keys_56_exp_avg_, l_self_state_list_l_self_state_keys_57_exp_avg_, l_self_state_list_l_self_state_keys_58_exp_avg_, l_self_state_list_l_self_state_keys_59_exp_avg_, l_self_state_list_l_self_state_keys_60_exp_avg_, l_self_state_list_l_self_state_keys_61_exp_avg_, l_self_state_list_l_self_state_keys_62_exp_avg_, l_self_state_list_l_self_state_keys_63_exp_avg_, l_self_state_list_l_self_state_keys_64_exp_avg_, l_self_state_list_l_self_state_keys_65_exp_avg_, l_self_state_list_l_self_state_keys_66_exp_avg_, l_self_state_list_l_self_state_keys_67_exp_avg_, l_self_state_list_l_self_state_keys_68_exp_avg_, l_self_state_list_l_self_state_keys_69_exp_avg_, l_self_state_list_l_self_state_keys_70_exp_avg_, l_self_state_list_l_self_state_keys_71_exp_avg_, l_self_state_list_l_self_state_keys_72_exp_avg_, l_self_state_list_l_self_state_keys_73_exp_avg_, l_self_state_list_l_self_state_keys_74_exp_avg_, l_self_state_list_l_self_state_keys_75_exp_avg_, l_self_state_list_l_self_state_keys_76_exp_avg_, l_self_state_list_l_self_state_keys_77_exp_avg_, l_self_state_list_l_self_state_keys_78_exp_avg_, l_self_state_list_l_self_state_keys_79_exp_avg_, l_self_state_list_l_self_state_keys_80_exp_avg_, l_self_state_list_l_self_state_keys_81_exp_avg_, l_self_state_list_l_self_state_keys_82_exp_avg_, l_self_state_list_l_self_state_keys_83_exp_avg_, l_self_state_list_l_self_state_keys_84_exp_avg_, l_self_state_list_l_self_state_keys_85_exp_avg_, l_self_state_list_l_self_state_keys_86_exp_avg_, l_self_state_list_l_self_state_keys_87_exp_avg_, l_self_state_list_l_self_state_keys_88_exp_avg_, l_self_state_list_l_self_state_keys_89_exp_avg_, l_self_state_list_l_self_state_keys_90_exp_avg_, l_self_state_list_l_self_state_keys_91_exp_avg_, l_self_state_list_l_self_state_keys_92_exp_avg_, l_self_state_list_l_self_state_keys_93_exp_avg_, l_self_state_list_l_self_state_keys_94_exp_avg_, l_self_state_list_l_self_state_keys_95_exp_avg_, l_self_state_list_l_self_state_keys_96_exp_avg_, l_self_state_list_l_self_state_keys_97_exp_avg_, l_self_state_list_l_self_state_keys_98_exp_avg_, l_self_state_list_l_self_state_keys_99_exp_avg_, l_self_state_list_l_self_state_keys_100_exp_avg_, l_self_state_list_l_self_state_keys_101_exp_avg_, l_self_state_list_l_self_state_keys_102_exp_avg_, l_self_state_list_l_self_state_keys_103_exp_avg_, l_self_state_list_l_self_state_keys_104_exp_avg_, l_self_state_list_l_self_state_keys_105_exp_avg_, l_self_state_list_l_self_state_keys_106_exp_avg_, l_self_state_list_l_self_state_keys_107_exp_avg_, l_self_state_list_l_self_state_keys_108_exp_avg_, l_self_state_list_l_self_state_keys_109_exp_avg_, l_self_state_list_l_self_state_keys_110_exp_avg_, l_self_state_list_l_self_state_keys_111_exp_avg_, l_self_state_list_l_self_state_keys_112_exp_avg_, l_self_state_list_l_self_state_keys_113_exp_avg_, l_self_state_list_l_self_state_keys_114_exp_avg_, l_self_state_list_l_self_state_keys_115_exp_avg_, l_self_state_list_l_self_state_keys_116_exp_avg_, l_self_state_list_l_self_state_keys_117_exp_avg_, l_self_state_list_l_self_state_keys_118_exp_avg_, l_self_state_list_l_self_state_keys_119_exp_avg_, l_self_state_list_l_self_state_keys_120_exp_avg_, l_self_state_list_l_self_state_keys_121_exp_avg_, l_self_state_list_l_self_state_keys_122_exp_avg_, l_self_state_list_l_self_state_keys_123_exp_avg_, l_self_state_list_l_self_state_keys_124_exp_avg_, l_self_state_list_l_self_state_keys_125_exp_avg_, l_self_state_list_l_self_state_keys_126_exp_avg_, l_self_state_list_l_self_state_keys_127_exp_avg_, l_self_state_list_l_self_state_keys_128_exp_avg_, l_self_state_list_l_self_state_keys_129_exp_avg_, l_self_state_list_l_self_state_keys_130_exp_avg_, l_self_state_list_l_self_state_keys_131_exp_avg_, l_self_state_list_l_self_state_keys_132_exp_avg_, l_self_state_list_l_self_state_keys_133_exp_avg_, l_self_state_list_l_self_state_keys_134_exp_avg_, l_self_state_list_l_self_state_keys_135_exp_avg_, l_self_state_list_l_self_state_keys_136_exp_avg_, l_self_state_list_l_self_state_keys_137_exp_avg_, l_self_state_list_l_self_state_keys_138_exp_avg_, l_self_state_list_l_self_state_keys_139_exp_avg_, l_self_state_list_l_self_state_keys_140_exp_avg_, l_self_state_list_l_self_state_keys_141_exp_avg_, l_self_state_list_l_self_state_keys_142_exp_avg_, l_self_state_list_l_self_state_keys_143_exp_avg_, l_self_state_list_l_self_state_keys_144_exp_avg_, l_self_state_list_l_self_state_keys_145_exp_avg_, l_self_state_list_l_self_state_keys_146_exp_avg_, l_self_state_list_l_self_state_keys_147_exp_avg_], (getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923)); l_self_param_groups_0_params_0_ = l_self_param_groups_0_params_1_ = l_self_param_groups_0_params_2_ = l_self_param_groups_0_params_3_ = l_self_param_groups_0_params_4_ = l_self_param_groups_0_params_5_ = l_self_param_groups_0_params_6_ = l_self_param_groups_0_params_7_ = l_self_param_groups_0_params_8_ = l_self_param_groups_0_params_9_ = l_self_param_groups_0_params_10_ = l_self_param_groups_0_params_11_ = l_self_param_groups_0_params_12_ = l_self_param_groups_0_params_13_ = l_self_param_groups_0_params_14_ = l_self_param_groups_0_params_15_ = l_self_param_groups_0_params_16_ = l_self_param_groups_0_params_17_ = l_self_param_groups_0_params_18_ = l_self_param_groups_0_params_19_ = l_self_param_groups_0_params_20_ = l_self_param_groups_0_params_21_ = l_self_param_groups_0_params_22_ = l_self_param_groups_0_params_23_ = l_self_param_groups_0_params_24_ = l_self_param_groups_0_params_25_ = l_self_param_groups_0_params_26_ = l_self_param_groups_0_params_27_ = l_self_param_groups_0_params_28_ = l_self_param_groups_0_params_29_ = l_self_param_groups_0_params_30_ = l_self_param_groups_0_params_31_ = l_self_param_groups_0_params_32_ = l_self_param_groups_0_params_33_ = l_self_param_groups_0_params_34_ = l_self_param_groups_0_params_35_ = l_self_param_groups_0_params_36_ = l_self_param_groups_0_params_37_ = l_self_param_groups_0_params_38_ = l_self_param_groups_0_params_39_ = l_self_param_groups_0_params_40_ = l_self_param_groups_0_params_41_ = l_self_param_groups_0_params_42_ = l_self_param_groups_0_params_43_ = l_self_param_groups_0_params_44_ = l_self_param_groups_0_params_45_ = l_self_param_groups_0_params_46_ = l_self_param_groups_0_params_47_ = l_self_param_groups_0_params_48_ = l_self_param_groups_0_params_49_ = l_self_param_groups_0_params_50_ = l_self_param_groups_0_params_51_ = l_self_param_groups_0_params_52_ = l_self_param_groups_0_params_53_ = l_self_param_groups_0_params_54_ = l_self_param_groups_0_params_55_ = l_self_param_groups_0_params_56_ = l_self_param_groups_0_params_57_ = l_self_param_groups_0_params_58_ = l_self_param_groups_0_params_59_ = l_self_param_groups_0_params_60_ = l_self_param_groups_0_params_61_ = l_self_param_groups_0_params_62_ = l_self_param_groups_0_params_63_ = l_self_param_groups_0_params_64_ = l_self_param_groups_0_params_65_ = l_self_param_groups_0_params_66_ = l_self_param_groups_0_params_67_ = l_self_param_groups_0_params_68_ = l_self_param_groups_0_params_69_ = l_self_param_groups_0_params_70_ = l_self_param_groups_0_params_71_ = l_self_param_groups_0_params_72_ = l_self_param_groups_0_params_73_ = l_self_param_groups_0_params_74_ = l_self_param_groups_0_params_75_ = l_self_param_groups_0_params_76_ = l_self_param_groups_0_params_77_ = l_self_param_groups_0_params_78_ = l_self_param_groups_0_params_79_ = l_self_param_groups_0_params_80_ = l_self_param_groups_0_params_81_ = l_self_param_groups_0_params_82_ = l_self_param_groups_0_params_83_ = l_self_param_groups_0_params_84_ = l_self_param_groups_0_params_85_ = l_self_param_groups_0_params_86_ = l_self_param_groups_0_params_87_ = l_self_param_groups_0_params_88_ = l_self_param_groups_0_params_89_ = l_self_param_groups_0_params_90_ = l_self_param_groups_0_params_91_ = l_self_param_groups_0_params_92_ = l_self_param_groups_0_params_93_ = l_self_param_groups_0_params_94_ = l_self_param_groups_0_params_95_ = l_self_param_groups_0_params_96_ = l_self_param_groups_0_params_97_ = l_self_param_groups_0_params_98_ = l_self_param_groups_0_params_99_ = l_self_param_groups_0_params_100_ = l_self_param_groups_0_params_101_ = l_self_param_groups_0_params_102_ = l_self_param_groups_0_params_103_ = l_self_param_groups_0_params_104_ = l_self_param_groups_0_params_105_ = l_self_param_groups_0_params_106_ = l_self_param_groups_0_params_107_ = l_self_param_groups_0_params_108_ = l_self_param_groups_0_params_109_ = l_self_param_groups_0_params_110_ = l_self_param_groups_0_params_111_ = l_self_param_groups_0_params_112_ = l_self_param_groups_0_params_113_ = l_self_param_groups_0_params_114_ = l_self_param_groups_0_params_115_ = l_self_param_groups_0_params_116_ = l_self_param_groups_0_params_117_ = l_self_param_groups_0_params_118_ = l_self_param_groups_0_params_119_ = l_self_param_groups_0_params_120_ = l_self_param_groups_0_params_121_ = l_self_param_groups_0_params_122_ = l_self_param_groups_0_params_123_ = l_self_param_groups_0_params_124_ = l_self_param_groups_0_params_125_ = l_self_param_groups_0_params_126_ = l_self_param_groups_0_params_127_ = l_self_param_groups_0_params_128_ = l_self_param_groups_0_params_129_ = l_self_param_groups_0_params_130_ = l_self_param_groups_0_params_131_ = l_self_param_groups_0_params_132_ = l_self_param_groups_0_params_133_ = l_self_param_groups_0_params_134_ = l_self_param_groups_0_params_135_ = l_self_param_groups_0_params_136_ = l_self_param_groups_0_params_137_ = l_self_param_groups_0_params_138_ = l_self_param_groups_0_params_139_ = l_self_param_groups_0_params_140_ = l_self_param_groups_0_params_141_ = l_self_param_groups_0_params_142_ = l_self_param_groups_0_params_143_ = l_self_param_groups_0_params_144_ = l_self_param_groups_0_params_145_ = l_self_param_groups_0_params_146_ = l_self_param_groups_0_params_147_ = l_self_state_list_l_self_state_keys_0_exp_avg_ = l_self_state_list_l_self_state_keys_1_exp_avg_ = l_self_state_list_l_self_state_keys_2_exp_avg_ = l_self_state_list_l_self_state_keys_3_exp_avg_ = l_self_state_list_l_self_state_keys_4_exp_avg_ = l_self_state_list_l_self_state_keys_5_exp_avg_ = l_self_state_list_l_self_state_keys_6_exp_avg_ = l_self_state_list_l_self_state_keys_7_exp_avg_ = l_self_state_list_l_self_state_keys_8_exp_avg_ = l_self_state_list_l_self_state_keys_9_exp_avg_ = l_self_state_list_l_self_state_keys_10_exp_avg_ = l_self_state_list_l_self_state_keys_11_exp_avg_ = l_self_state_list_l_self_state_keys_12_exp_avg_ = l_self_state_list_l_self_state_keys_13_exp_avg_ = l_self_state_list_l_self_state_keys_14_exp_avg_ = l_self_state_list_l_self_state_keys_15_exp_avg_ = l_self_state_list_l_self_state_keys_16_exp_avg_ = l_self_state_list_l_self_state_keys_17_exp_avg_ = l_self_state_list_l_self_state_keys_18_exp_avg_ = l_self_state_list_l_self_state_keys_19_exp_avg_ = l_self_state_list_l_self_state_keys_20_exp_avg_ = l_self_state_list_l_self_state_keys_21_exp_avg_ = l_self_state_list_l_self_state_keys_22_exp_avg_ = l_self_state_list_l_self_state_keys_23_exp_avg_ = l_self_state_list_l_self_state_keys_24_exp_avg_ = l_self_state_list_l_self_state_keys_25_exp_avg_ = l_self_state_list_l_self_state_keys_26_exp_avg_ = l_self_state_list_l_self_state_keys_27_exp_avg_ = l_self_state_list_l_self_state_keys_28_exp_avg_ = l_self_state_list_l_self_state_keys_29_exp_avg_ = l_self_state_list_l_self_state_keys_30_exp_avg_ = l_self_state_list_l_self_state_keys_31_exp_avg_ = l_self_state_list_l_self_state_keys_32_exp_avg_ = l_self_state_list_l_self_state_keys_33_exp_avg_ = l_self_state_list_l_self_state_keys_34_exp_avg_ = l_self_state_list_l_self_state_keys_35_exp_avg_ = l_self_state_list_l_self_state_keys_36_exp_avg_ = l_self_state_list_l_self_state_keys_37_exp_avg_ = l_self_state_list_l_self_state_keys_38_exp_avg_ = l_self_state_list_l_self_state_keys_39_exp_avg_ = l_self_state_list_l_self_state_keys_40_exp_avg_ = l_self_state_list_l_self_state_keys_41_exp_avg_ = l_self_state_list_l_self_state_keys_42_exp_avg_ = l_self_state_list_l_self_state_keys_43_exp_avg_ = l_self_state_list_l_self_state_keys_44_exp_avg_ = l_self_state_list_l_self_state_keys_45_exp_avg_ = l_self_state_list_l_self_state_keys_46_exp_avg_ = l_self_state_list_l_self_state_keys_47_exp_avg_ = l_self_state_list_l_self_state_keys_48_exp_avg_ = l_self_state_list_l_self_state_keys_49_exp_avg_ = l_self_state_list_l_self_state_keys_50_exp_avg_ = l_self_state_list_l_self_state_keys_51_exp_avg_ = l_self_state_list_l_self_state_keys_52_exp_avg_ = l_self_state_list_l_self_state_keys_53_exp_avg_ = l_self_state_list_l_self_state_keys_54_exp_avg_ = l_self_state_list_l_self_state_keys_55_exp_avg_ = l_self_state_list_l_self_state_keys_56_exp_avg_ = l_self_state_list_l_self_state_keys_57_exp_avg_ = l_self_state_list_l_self_state_keys_58_exp_avg_ = l_self_state_list_l_self_state_keys_59_exp_avg_ = l_self_state_list_l_self_state_keys_60_exp_avg_ = l_self_state_list_l_self_state_keys_61_exp_avg_ = l_self_state_list_l_self_state_keys_62_exp_avg_ = l_self_state_list_l_self_state_keys_63_exp_avg_ = l_self_state_list_l_self_state_keys_64_exp_avg_ = l_self_state_list_l_self_state_keys_65_exp_avg_ = l_self_state_list_l_self_state_keys_66_exp_avg_ = l_self_state_list_l_self_state_keys_67_exp_avg_ = l_self_state_list_l_self_state_keys_68_exp_avg_ = l_self_state_list_l_self_state_keys_69_exp_avg_ = l_self_state_list_l_self_state_keys_70_exp_avg_ = l_self_state_list_l_self_state_keys_71_exp_avg_ = l_self_state_list_l_self_state_keys_72_exp_avg_ = l_self_state_list_l_self_state_keys_73_exp_avg_ = l_self_state_list_l_self_state_keys_74_exp_avg_ = l_self_state_list_l_self_state_keys_75_exp_avg_ = l_self_state_list_l_self_state_keys_76_exp_avg_ = l_self_state_list_l_self_state_keys_77_exp_avg_ = l_self_state_list_l_self_state_keys_78_exp_avg_ = l_self_state_list_l_self_state_keys_79_exp_avg_ = l_self_state_list_l_self_state_keys_80_exp_avg_ = l_self_state_list_l_self_state_keys_81_exp_avg_ = l_self_state_list_l_self_state_keys_82_exp_avg_ = l_self_state_list_l_self_state_keys_83_exp_avg_ = l_self_state_list_l_self_state_keys_84_exp_avg_ = l_self_state_list_l_self_state_keys_85_exp_avg_ = l_self_state_list_l_self_state_keys_86_exp_avg_ = l_self_state_list_l_self_state_keys_87_exp_avg_ = l_self_state_list_l_self_state_keys_88_exp_avg_ = l_self_state_list_l_self_state_keys_89_exp_avg_ = l_self_state_list_l_self_state_keys_90_exp_avg_ = l_self_state_list_l_self_state_keys_91_exp_avg_ = l_self_state_list_l_self_state_keys_92_exp_avg_ = l_self_state_list_l_self_state_keys_93_exp_avg_ = l_self_state_list_l_self_state_keys_94_exp_avg_ = l_self_state_list_l_self_state_keys_95_exp_avg_ = l_self_state_list_l_self_state_keys_96_exp_avg_ = l_self_state_list_l_self_state_keys_97_exp_avg_ = l_self_state_list_l_self_state_keys_98_exp_avg_ = l_self_state_list_l_self_state_keys_99_exp_avg_ = l_self_state_list_l_self_state_keys_100_exp_avg_ = l_self_state_list_l_self_state_keys_101_exp_avg_ = l_self_state_list_l_self_state_keys_102_exp_avg_ = l_self_state_list_l_self_state_keys_103_exp_avg_ = l_self_state_list_l_self_state_keys_104_exp_avg_ = l_self_state_list_l_self_state_keys_105_exp_avg_ = l_self_state_list_l_self_state_keys_106_exp_avg_ = l_self_state_list_l_self_state_keys_107_exp_avg_ = l_self_state_list_l_self_state_keys_108_exp_avg_ = l_self_state_list_l_self_state_keys_109_exp_avg_ = l_self_state_list_l_self_state_keys_110_exp_avg_ = l_self_state_list_l_self_state_keys_111_exp_avg_ = l_self_state_list_l_self_state_keys_112_exp_avg_ = l_self_state_list_l_self_state_keys_113_exp_avg_ = l_self_state_list_l_self_state_keys_114_exp_avg_ = l_self_state_list_l_self_state_keys_115_exp_avg_ = l_self_state_list_l_self_state_keys_116_exp_avg_ = l_self_state_list_l_self_state_keys_117_exp_avg_ = l_self_state_list_l_self_state_keys_118_exp_avg_ = l_self_state_list_l_self_state_keys_119_exp_avg_ = l_self_state_list_l_self_state_keys_120_exp_avg_ = l_self_state_list_l_self_state_keys_121_exp_avg_ = l_self_state_list_l_self_state_keys_122_exp_avg_ = l_self_state_list_l_self_state_keys_123_exp_avg_ = l_self_state_list_l_self_state_keys_124_exp_avg_ = l_self_state_list_l_self_state_keys_125_exp_avg_ = l_self_state_list_l_self_state_keys_126_exp_avg_ = l_self_state_list_l_self_state_keys_127_exp_avg_ = l_self_state_list_l_self_state_keys_128_exp_avg_ = l_self_state_list_l_self_state_keys_129_exp_avg_ = l_self_state_list_l_self_state_keys_130_exp_avg_ = l_self_state_list_l_self_state_keys_131_exp_avg_ = l_self_state_list_l_self_state_keys_132_exp_avg_ = l_self_state_list_l_self_state_keys_133_exp_avg_ = l_self_state_list_l_self_state_keys_134_exp_avg_ = l_self_state_list_l_self_state_keys_135_exp_avg_ = l_self_state_list_l_self_state_keys_136_exp_avg_ = l_self_state_list_l_self_state_keys_137_exp_avg_ = l_self_state_list_l_self_state_keys_138_exp_avg_ = l_self_state_list_l_self_state_keys_139_exp_avg_ = l_self_state_list_l_self_state_keys_140_exp_avg_ = l_self_state_list_l_self_state_keys_141_exp_avg_ = l_self_state_list_l_self_state_keys_142_exp_avg_ = l_self_state_list_l_self_state_keys_143_exp_avg_ = l_self_state_list_l_self_state_keys_144_exp_avg_ = l_self_state_list_l_self_state_keys_145_exp_avg_ = l_self_state_list_l_self_state_keys_146_exp_avg_ = l_self_state_list_l_self_state_keys_147_exp_avg_ = getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = _foreach_addcdiv_ = None + return () + +V0806 13:56:07.383000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9e669f5de4c8b0f7cdd129f8fcc83f7c"} + { + "name": "OutputGraph.call_user_compiler", + "ts": 1722977767383256.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:07.383000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2262e283850af4a8bd9532184526e3a4"} + { + "name": "backend_compile", + "ts": 1722977767383359.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:07.517000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "b9173e7748073b756e5d593bc739b193"} + { + "name": "create_aot_dispatcher_function", + "ts": 1722977767516968.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:10.004000 4107173 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:200] {"aot_forward_graph": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "60e4dea494f30c4eb3ba47a5e0bbfd0d"} + class (torch.nn.Module): + def forward(self, arg0_1: "f32[50304, 768][768, 1]cuda:0", arg1_1: "f32[1024, 768][768, 1]cuda:0", arg2_1: "f32[768][1]cuda:0", arg3_1: "f32[768][1]cuda:0", arg4_1: "f32[2304, 768][768, 1]cuda:0", arg5_1: "f32[2304][1]cuda:0", arg6_1: "f32[768, 768][768, 1]cuda:0", arg7_1: "f32[768][1]cuda:0", arg8_1: "f32[768][1]cuda:0", arg9_1: "f32[768][1]cuda:0", arg10_1: "f32[3072, 768][768, 1]cuda:0", arg11_1: "f32[3072][1]cuda:0", arg12_1: "f32[768, 3072][3072, 1]cuda:0", arg13_1: "f32[768][1]cuda:0", arg14_1: "f32[768][1]cuda:0", arg15_1: "f32[768][1]cuda:0", arg16_1: "f32[2304, 768][768, 1]cuda:0", arg17_1: "f32[2304][1]cuda:0", arg18_1: "f32[768, 768][768, 1]cuda:0", arg19_1: "f32[768][1]cuda:0", arg20_1: "f32[768][1]cuda:0", arg21_1: "f32[768][1]cuda:0", arg22_1: "f32[3072, 768][768, 1]cuda:0", arg23_1: "f32[3072][1]cuda:0", arg24_1: "f32[768, 3072][3072, 1]cuda:0", arg25_1: "f32[768][1]cuda:0", arg26_1: "f32[768][1]cuda:0", arg27_1: "f32[768][1]cuda:0", arg28_1: "f32[2304, 768][768, 1]cuda:0", arg29_1: "f32[2304][1]cuda:0", arg30_1: "f32[768, 768][768, 1]cuda:0", arg31_1: "f32[768][1]cuda:0", arg32_1: "f32[768][1]cuda:0", arg33_1: "f32[768][1]cuda:0", arg34_1: "f32[3072, 768][768, 1]cuda:0", arg35_1: "f32[3072][1]cuda:0", arg36_1: "f32[768, 3072][3072, 1]cuda:0", arg37_1: "f32[768][1]cuda:0", arg38_1: "f32[768][1]cuda:0", arg39_1: "f32[768][1]cuda:0", arg40_1: "f32[2304, 768][768, 1]cuda:0", arg41_1: "f32[2304][1]cuda:0", arg42_1: "f32[768, 768][768, 1]cuda:0", arg43_1: "f32[768][1]cuda:0", arg44_1: "f32[768][1]cuda:0", arg45_1: "f32[768][1]cuda:0", arg46_1: "f32[3072, 768][768, 1]cuda:0", arg47_1: "f32[3072][1]cuda:0", arg48_1: "f32[768, 3072][3072, 1]cuda:0", arg49_1: "f32[768][1]cuda:0", arg50_1: "f32[768][1]cuda:0", arg51_1: "f32[768][1]cuda:0", arg52_1: "f32[2304, 768][768, 1]cuda:0", arg53_1: "f32[2304][1]cuda:0", arg54_1: "f32[768, 768][768, 1]cuda:0", arg55_1: "f32[768][1]cuda:0", arg56_1: "f32[768][1]cuda:0", arg57_1: "f32[768][1]cuda:0", arg58_1: "f32[3072, 768][768, 1]cuda:0", arg59_1: "f32[3072][1]cuda:0", arg60_1: "f32[768, 3072][3072, 1]cuda:0", arg61_1: "f32[768][1]cuda:0", arg62_1: "f32[768][1]cuda:0", arg63_1: "f32[768][1]cuda:0", arg64_1: "f32[2304, 768][768, 1]cuda:0", arg65_1: "f32[2304][1]cuda:0", arg66_1: "f32[768, 768][768, 1]cuda:0", arg67_1: "f32[768][1]cuda:0", arg68_1: "f32[768][1]cuda:0", arg69_1: "f32[768][1]cuda:0", arg70_1: "f32[3072, 768][768, 1]cuda:0", arg71_1: "f32[3072][1]cuda:0", arg72_1: "f32[768, 3072][3072, 1]cuda:0", arg73_1: "f32[768][1]cuda:0", arg74_1: "f32[768][1]cuda:0", arg75_1: "f32[768][1]cuda:0", arg76_1: "f32[2304, 768][768, 1]cuda:0", arg77_1: "f32[2304][1]cuda:0", arg78_1: "f32[768, 768][768, 1]cuda:0", arg79_1: "f32[768][1]cuda:0", arg80_1: "f32[768][1]cuda:0", arg81_1: "f32[768][1]cuda:0", arg82_1: "f32[3072, 768][768, 1]cuda:0", arg83_1: "f32[3072][1]cuda:0", arg84_1: "f32[768, 3072][3072, 1]cuda:0", arg85_1: "f32[768][1]cuda:0", arg86_1: "f32[768][1]cuda:0", arg87_1: "f32[768][1]cuda:0", arg88_1: "f32[2304, 768][768, 1]cuda:0", arg89_1: "f32[2304][1]cuda:0", arg90_1: "f32[768, 768][768, 1]cuda:0", arg91_1: "f32[768][1]cuda:0", arg92_1: "f32[768][1]cuda:0", arg93_1: "f32[768][1]cuda:0", arg94_1: "f32[3072, 768][768, 1]cuda:0", arg95_1: "f32[3072][1]cuda:0", arg96_1: "f32[768, 3072][3072, 1]cuda:0", arg97_1: "f32[768][1]cuda:0", arg98_1: "f32[768][1]cuda:0", arg99_1: "f32[768][1]cuda:0", arg100_1: "f32[2304, 768][768, 1]cuda:0", arg101_1: "f32[2304][1]cuda:0", arg102_1: "f32[768, 768][768, 1]cuda:0", arg103_1: "f32[768][1]cuda:0", arg104_1: "f32[768][1]cuda:0", arg105_1: "f32[768][1]cuda:0", arg106_1: "f32[3072, 768][768, 1]cuda:0", arg107_1: "f32[3072][1]cuda:0", arg108_1: "f32[768, 3072][3072, 1]cuda:0", arg109_1: "f32[768][1]cuda:0", arg110_1: "f32[768][1]cuda:0", arg111_1: "f32[768][1]cuda:0", arg112_1: "f32[2304, 768][768, 1]cuda:0", arg113_1: "f32[2304][1]cuda:0", arg114_1: "f32[768, 768][768, 1]cuda:0", arg115_1: "f32[768][1]cuda:0", arg116_1: "f32[768][1]cuda:0", arg117_1: "f32[768][1]cuda:0", arg118_1: "f32[3072, 768][768, 1]cuda:0", arg119_1: "f32[3072][1]cuda:0", arg120_1: "f32[768, 3072][3072, 1]cuda:0", arg121_1: "f32[768][1]cuda:0", arg122_1: "f32[768][1]cuda:0", arg123_1: "f32[768][1]cuda:0", arg124_1: "f32[2304, 768][768, 1]cuda:0", arg125_1: "f32[2304][1]cuda:0", arg126_1: "f32[768, 768][768, 1]cuda:0", arg127_1: "f32[768][1]cuda:0", arg128_1: "f32[768][1]cuda:0", arg129_1: "f32[768][1]cuda:0", arg130_1: "f32[3072, 768][768, 1]cuda:0", arg131_1: "f32[3072][1]cuda:0", arg132_1: "f32[768, 3072][3072, 1]cuda:0", arg133_1: "f32[768][1]cuda:0", arg134_1: "f32[768][1]cuda:0", arg135_1: "f32[768][1]cuda:0", arg136_1: "f32[2304, 768][768, 1]cuda:0", arg137_1: "f32[2304][1]cuda:0", arg138_1: "f32[768, 768][768, 1]cuda:0", arg139_1: "f32[768][1]cuda:0", arg140_1: "f32[768][1]cuda:0", arg141_1: "f32[768][1]cuda:0", arg142_1: "f32[3072, 768][768, 1]cuda:0", arg143_1: "f32[3072][1]cuda:0", arg144_1: "f32[768, 3072][3072, 1]cuda:0", arg145_1: "f32[768][1]cuda:0", arg146_1: "f32[768][1]cuda:0", arg147_1: "f32[768][1]cuda:0", arg148_1: "f32[][]cuda:0", arg149_1: "f32[1024, 768][768, 1]cuda:0", arg150_1: "f32[1024, 768][768, 1]cuda:0", arg151_1: "f32[50304, 768][768, 1]cuda:0", arg152_1: "f32[1024, 768][768, 1]cuda:0", arg153_1: "f32[768][1]cuda:0", arg154_1: "f32[768][1]cuda:0", arg155_1: "f32[2304, 768][768, 1]cuda:0", arg156_1: "f32[2304][1]cuda:0", arg157_1: "f32[768, 768][768, 1]cuda:0", arg158_1: "f32[768][1]cuda:0", arg159_1: "f32[768][1]cuda:0", arg160_1: "f32[768][1]cuda:0", arg161_1: "f32[3072, 768][768, 1]cuda:0", arg162_1: "f32[3072][1]cuda:0", arg163_1: "f32[768, 3072][3072, 1]cuda:0", arg164_1: "f32[768][1]cuda:0", arg165_1: "f32[768][1]cuda:0", arg166_1: "f32[768][1]cuda:0", arg167_1: "f32[2304, 768][768, 1]cuda:0", arg168_1: "f32[2304][1]cuda:0", arg169_1: "f32[768, 768][768, 1]cuda:0", arg170_1: "f32[768][1]cuda:0", arg171_1: "f32[768][1]cuda:0", arg172_1: "f32[768][1]cuda:0", arg173_1: "f32[3072, 768][768, 1]cuda:0", arg174_1: "f32[3072][1]cuda:0", arg175_1: "f32[768, 3072][3072, 1]cuda:0", arg176_1: "f32[768][1]cuda:0", arg177_1: "f32[768][1]cuda:0", arg178_1: "f32[768][1]cuda:0", arg179_1: "f32[2304, 768][768, 1]cuda:0", arg180_1: "f32[2304][1]cuda:0", arg181_1: "f32[768, 768][768, 1]cuda:0", arg182_1: "f32[768][1]cuda:0", arg183_1: "f32[768][1]cuda:0", arg184_1: "f32[768][1]cuda:0", arg185_1: "f32[3072, 768][768, 1]cuda:0", arg186_1: "f32[3072][1]cuda:0", arg187_1: "f32[768, 3072][3072, 1]cuda:0", arg188_1: "f32[768][1]cuda:0", arg189_1: "f32[768][1]cuda:0", arg190_1: "f32[768][1]cuda:0", arg191_1: "f32[2304, 768][768, 1]cuda:0", arg192_1: "f32[2304][1]cuda:0", arg193_1: "f32[768, 768][768, 1]cuda:0", arg194_1: "f32[768][1]cuda:0", arg195_1: "f32[768][1]cuda:0", arg196_1: "f32[768][1]cuda:0", arg197_1: "f32[3072, 768][768, 1]cuda:0", arg198_1: "f32[3072][1]cuda:0", arg199_1: "f32[768, 3072][3072, 1]cuda:0", arg200_1: "f32[768][1]cuda:0", arg201_1: "f32[768][1]cuda:0", arg202_1: "f32[768][1]cuda:0", arg203_1: "f32[2304, 768][768, 1]cuda:0", arg204_1: "f32[2304][1]cuda:0", arg205_1: "f32[768, 768][768, 1]cuda:0", arg206_1: "f32[768][1]cuda:0", arg207_1: "f32[768][1]cuda:0", arg208_1: "f32[768][1]cuda:0", arg209_1: "f32[3072, 768][768, 1]cuda:0", arg210_1: "f32[3072][1]cuda:0", arg211_1: "f32[768, 3072][3072, 1]cuda:0", arg212_1: "f32[768][1]cuda:0", arg213_1: "f32[768][1]cuda:0", arg214_1: "f32[768][1]cuda:0", arg215_1: "f32[2304, 768][768, 1]cuda:0", arg216_1: "f32[2304][1]cuda:0", arg217_1: "f32[768, 768][768, 1]cuda:0", arg218_1: "f32[768][1]cuda:0", arg219_1: "f32[768][1]cuda:0", arg220_1: "f32[768][1]cuda:0", arg221_1: "f32[3072, 768][768, 1]cuda:0", arg222_1: "f32[3072][1]cuda:0", arg223_1: "f32[768, 3072][3072, 1]cuda:0", arg224_1: "f32[768][1]cuda:0", arg225_1: "f32[768][1]cuda:0", arg226_1: "f32[768][1]cuda:0", arg227_1: "f32[2304, 768][768, 1]cuda:0", arg228_1: "f32[2304][1]cuda:0", arg229_1: "f32[768, 768][768, 1]cuda:0", arg230_1: "f32[768][1]cuda:0", arg231_1: "f32[768][1]cuda:0", arg232_1: "f32[768][1]cuda:0", arg233_1: "f32[3072, 768][768, 1]cuda:0", arg234_1: "f32[3072][1]cuda:0", arg235_1: "f32[768, 3072][3072, 1]cuda:0", arg236_1: "f32[768][1]cuda:0", arg237_1: "f32[768][1]cuda:0", arg238_1: "f32[768][1]cuda:0", arg239_1: "f32[2304, 768][768, 1]cuda:0", arg240_1: "f32[2304][1]cuda:0", arg241_1: "f32[768, 768][768, 1]cuda:0", arg242_1: "f32[768][1]cuda:0", arg243_1: "f32[768][1]cuda:0", arg244_1: "f32[768][1]cuda:0", arg245_1: "f32[3072, 768][768, 1]cuda:0", arg246_1: "f32[3072][1]cuda:0", arg247_1: "f32[768, 3072][3072, 1]cuda:0", arg248_1: "f32[768][1]cuda:0", arg249_1: "f32[768][1]cuda:0", arg250_1: "f32[768][1]cuda:0", arg251_1: "f32[2304, 768][768, 1]cuda:0", arg252_1: "f32[2304][1]cuda:0", arg253_1: "f32[768, 768][768, 1]cuda:0", arg254_1: "f32[768][1]cuda:0", arg255_1: "f32[768][1]cuda:0", arg256_1: "f32[768][1]cuda:0", arg257_1: "f32[3072, 768][768, 1]cuda:0", arg258_1: "f32[3072][1]cuda:0", arg259_1: "f32[768, 3072][3072, 1]cuda:0", arg260_1: "f32[768][1]cuda:0", arg261_1: "f32[768][1]cuda:0", arg262_1: "f32[768][1]cuda:0", arg263_1: "f32[2304, 768][768, 1]cuda:0", arg264_1: "f32[2304][1]cuda:0", arg265_1: "f32[768, 768][768, 1]cuda:0", arg266_1: "f32[768][1]cuda:0", arg267_1: "f32[768][1]cuda:0", arg268_1: "f32[768][1]cuda:0", arg269_1: "f32[3072, 768][768, 1]cuda:0", arg270_1: "f32[3072][1]cuda:0", arg271_1: "f32[768, 3072][3072, 1]cuda:0", arg272_1: "f32[768][1]cuda:0", arg273_1: "f32[768][1]cuda:0", arg274_1: "f32[768][1]cuda:0", arg275_1: "f32[2304, 768][768, 1]cuda:0", arg276_1: "f32[2304][1]cuda:0", arg277_1: "f32[768, 768][768, 1]cuda:0", arg278_1: "f32[768][1]cuda:0", arg279_1: "f32[768][1]cuda:0", arg280_1: "f32[768][1]cuda:0", arg281_1: "f32[3072, 768][768, 1]cuda:0", arg282_1: "f32[3072][1]cuda:0", arg283_1: "f32[768, 3072][3072, 1]cuda:0", arg284_1: "f32[768][1]cuda:0", arg285_1: "f32[768][1]cuda:0", arg286_1: "f32[768][1]cuda:0", arg287_1: "f32[2304, 768][768, 1]cuda:0", arg288_1: "f32[2304][1]cuda:0", arg289_1: "f32[768, 768][768, 1]cuda:0", arg290_1: "f32[768][1]cuda:0", arg291_1: "f32[768][1]cuda:0", arg292_1: "f32[768][1]cuda:0", arg293_1: "f32[3072, 768][768, 1]cuda:0", arg294_1: "f32[3072][1]cuda:0", arg295_1: "f32[768, 3072][3072, 1]cuda:0", arg296_1: "f32[768][1]cuda:0", arg297_1: "f32[768][1]cuda:0", arg298_1: "f32[768][1]cuda:0", arg299_1: "f32[50304, 768][768, 1]cuda:0", arg300_1: "f32[768][1]cuda:0", arg301_1: "f32[768][1]cuda:0", arg302_1: "f32[2304, 768][768, 1]cuda:0", arg303_1: "f32[2304][1]cuda:0", arg304_1: "f32[768, 768][768, 1]cuda:0", arg305_1: "f32[768][1]cuda:0", arg306_1: "f32[768][1]cuda:0", arg307_1: "f32[768][1]cuda:0", arg308_1: "f32[3072, 768][768, 1]cuda:0", arg309_1: "f32[3072][1]cuda:0", arg310_1: "f32[768, 3072][3072, 1]cuda:0", arg311_1: "f32[768][1]cuda:0", arg312_1: "f32[768][1]cuda:0", arg313_1: "f32[768][1]cuda:0", arg314_1: "f32[2304, 768][768, 1]cuda:0", arg315_1: "f32[2304][1]cuda:0", arg316_1: "f32[768, 768][768, 1]cuda:0", arg317_1: "f32[768][1]cuda:0", arg318_1: "f32[768][1]cuda:0", arg319_1: "f32[768][1]cuda:0", arg320_1: "f32[3072, 768][768, 1]cuda:0", arg321_1: "f32[3072][1]cuda:0", arg322_1: "f32[768, 3072][3072, 1]cuda:0", arg323_1: "f32[768][1]cuda:0", arg324_1: "f32[768][1]cuda:0", arg325_1: "f32[768][1]cuda:0", arg326_1: "f32[2304, 768][768, 1]cuda:0", arg327_1: "f32[2304][1]cuda:0", arg328_1: "f32[768, 768][768, 1]cuda:0", arg329_1: "f32[768][1]cuda:0", arg330_1: "f32[768][1]cuda:0", arg331_1: "f32[768][1]cuda:0", arg332_1: "f32[3072, 768][768, 1]cuda:0", arg333_1: "f32[3072][1]cuda:0", arg334_1: "f32[768, 3072][3072, 1]cuda:0", arg335_1: "f32[768][1]cuda:0", arg336_1: "f32[768][1]cuda:0", arg337_1: "f32[768][1]cuda:0", arg338_1: "f32[2304, 768][768, 1]cuda:0", arg339_1: "f32[2304][1]cuda:0", arg340_1: "f32[768, 768][768, 1]cuda:0", arg341_1: "f32[768][1]cuda:0", arg342_1: "f32[768][1]cuda:0", arg343_1: "f32[768][1]cuda:0", arg344_1: "f32[3072, 768][768, 1]cuda:0", arg345_1: "f32[3072][1]cuda:0", arg346_1: "f32[768, 3072][3072, 1]cuda:0", arg347_1: "f32[768][1]cuda:0", arg348_1: "f32[768][1]cuda:0", arg349_1: "f32[768][1]cuda:0", arg350_1: "f32[2304, 768][768, 1]cuda:0", arg351_1: "f32[2304][1]cuda:0", arg352_1: "f32[768, 768][768, 1]cuda:0", arg353_1: "f32[768][1]cuda:0", arg354_1: "f32[768][1]cuda:0", arg355_1: "f32[768][1]cuda:0", arg356_1: "f32[3072, 768][768, 1]cuda:0", arg357_1: "f32[3072][1]cuda:0", arg358_1: "f32[768, 3072][3072, 1]cuda:0", arg359_1: "f32[768][1]cuda:0", arg360_1: "f32[768][1]cuda:0", arg361_1: "f32[768][1]cuda:0", arg362_1: "f32[2304, 768][768, 1]cuda:0", arg363_1: "f32[2304][1]cuda:0", arg364_1: "f32[768, 768][768, 1]cuda:0", arg365_1: "f32[768][1]cuda:0", arg366_1: "f32[768][1]cuda:0", arg367_1: "f32[768][1]cuda:0", arg368_1: "f32[3072, 768][768, 1]cuda:0", arg369_1: "f32[3072][1]cuda:0", arg370_1: "f32[768, 3072][3072, 1]cuda:0", arg371_1: "f32[768][1]cuda:0", arg372_1: "f32[768][1]cuda:0", arg373_1: "f32[768][1]cuda:0", arg374_1: "f32[2304, 768][768, 1]cuda:0", arg375_1: "f32[2304][1]cuda:0", arg376_1: "f32[768, 768][768, 1]cuda:0", arg377_1: "f32[768][1]cuda:0", arg378_1: "f32[768][1]cuda:0", arg379_1: "f32[768][1]cuda:0", arg380_1: "f32[3072, 768][768, 1]cuda:0", arg381_1: "f32[3072][1]cuda:0", arg382_1: "f32[768, 3072][3072, 1]cuda:0", arg383_1: "f32[768][1]cuda:0", arg384_1: "f32[768][1]cuda:0", arg385_1: "f32[768][1]cuda:0", arg386_1: "f32[2304, 768][768, 1]cuda:0", arg387_1: "f32[2304][1]cuda:0", arg388_1: "f32[768, 768][768, 1]cuda:0", arg389_1: "f32[768][1]cuda:0", arg390_1: "f32[768][1]cuda:0", arg391_1: "f32[768][1]cuda:0", arg392_1: "f32[3072, 768][768, 1]cuda:0", arg393_1: "f32[3072][1]cuda:0", arg394_1: "f32[768, 3072][3072, 1]cuda:0", arg395_1: "f32[768][1]cuda:0", arg396_1: "f32[768][1]cuda:0", arg397_1: "f32[768][1]cuda:0", arg398_1: "f32[2304, 768][768, 1]cuda:0", arg399_1: "f32[2304][1]cuda:0", arg400_1: "f32[768, 768][768, 1]cuda:0", arg401_1: "f32[768][1]cuda:0", arg402_1: "f32[768][1]cuda:0", arg403_1: "f32[768][1]cuda:0", arg404_1: "f32[3072, 768][768, 1]cuda:0", arg405_1: "f32[3072][1]cuda:0", arg406_1: "f32[768, 3072][3072, 1]cuda:0", arg407_1: "f32[768][1]cuda:0", arg408_1: "f32[768][1]cuda:0", arg409_1: "f32[768][1]cuda:0", arg410_1: "f32[2304, 768][768, 1]cuda:0", arg411_1: "f32[2304][1]cuda:0", arg412_1: "f32[768, 768][768, 1]cuda:0", arg413_1: "f32[768][1]cuda:0", arg414_1: "f32[768][1]cuda:0", arg415_1: "f32[768][1]cuda:0", arg416_1: "f32[3072, 768][768, 1]cuda:0", arg417_1: "f32[3072][1]cuda:0", arg418_1: "f32[768, 3072][3072, 1]cuda:0", arg419_1: "f32[768][1]cuda:0", arg420_1: "f32[768][1]cuda:0", arg421_1: "f32[768][1]cuda:0", arg422_1: "f32[2304, 768][768, 1]cuda:0", arg423_1: "f32[2304][1]cuda:0", arg424_1: "f32[768, 768][768, 1]cuda:0", arg425_1: "f32[768][1]cuda:0", arg426_1: "f32[768][1]cuda:0", arg427_1: "f32[768][1]cuda:0", arg428_1: "f32[3072, 768][768, 1]cuda:0", arg429_1: "f32[3072][1]cuda:0", arg430_1: "f32[768, 3072][3072, 1]cuda:0", arg431_1: "f32[768][1]cuda:0", arg432_1: "f32[768][1]cuda:0", arg433_1: "f32[768][1]cuda:0", arg434_1: "f32[2304, 768][768, 1]cuda:0", arg435_1: "f32[2304][1]cuda:0", arg436_1: "f32[768, 768][768, 1]cuda:0", arg437_1: "f32[768][1]cuda:0", arg438_1: "f32[768][1]cuda:0", arg439_1: "f32[768][1]cuda:0", arg440_1: "f32[3072, 768][768, 1]cuda:0", arg441_1: "f32[3072][1]cuda:0", arg442_1: "f32[768, 3072][3072, 1]cuda:0", arg443_1: "f32[768][1]cuda:0", arg444_1: "f32[768][1]cuda:0", arg445_1: "f32[768][1]cuda:0", arg446_1: "f32[50304, 768][768, 1]cuda:0", arg447_1: "f32[768][1]cuda:0", arg448_1: "f32[768][1]cuda:0", arg449_1: "f32[2304, 768][768, 1]cuda:0", arg450_1: "f32[2304][1]cuda:0", arg451_1: "f32[768, 768][768, 1]cuda:0", arg452_1: "f32[768][1]cuda:0", arg453_1: "f32[768][1]cuda:0", arg454_1: "f32[768][1]cuda:0", arg455_1: "f32[3072, 768][768, 1]cuda:0", arg456_1: "f32[3072][1]cuda:0", arg457_1: "f32[768, 3072][3072, 1]cuda:0", arg458_1: "f32[768][1]cuda:0", arg459_1: "f32[768][1]cuda:0", arg460_1: "f32[768][1]cuda:0", arg461_1: "f32[2304, 768][768, 1]cuda:0", arg462_1: "f32[2304][1]cuda:0", arg463_1: "f32[768, 768][768, 1]cuda:0", arg464_1: "f32[768][1]cuda:0", arg465_1: "f32[768][1]cuda:0", arg466_1: "f32[768][1]cuda:0", arg467_1: "f32[3072, 768][768, 1]cuda:0", arg468_1: "f32[3072][1]cuda:0", arg469_1: "f32[768, 3072][3072, 1]cuda:0", arg470_1: "f32[768][1]cuda:0", arg471_1: "f32[768][1]cuda:0", arg472_1: "f32[768][1]cuda:0", arg473_1: "f32[2304, 768][768, 1]cuda:0", arg474_1: "f32[2304][1]cuda:0", arg475_1: "f32[768, 768][768, 1]cuda:0", arg476_1: "f32[768][1]cuda:0", arg477_1: "f32[768][1]cuda:0", arg478_1: "f32[768][1]cuda:0", arg479_1: "f32[3072, 768][768, 1]cuda:0", arg480_1: "f32[3072][1]cuda:0", arg481_1: "f32[768, 3072][3072, 1]cuda:0", arg482_1: "f32[768][1]cuda:0", arg483_1: "f32[768][1]cuda:0", arg484_1: "f32[768][1]cuda:0", arg485_1: "f32[2304, 768][768, 1]cuda:0", arg486_1: "f32[2304][1]cuda:0", arg487_1: "f32[768, 768][768, 1]cuda:0", arg488_1: "f32[768][1]cuda:0", arg489_1: "f32[768][1]cuda:0", arg490_1: "f32[768][1]cuda:0", arg491_1: "f32[3072, 768][768, 1]cuda:0", arg492_1: "f32[3072][1]cuda:0", arg493_1: "f32[768, 3072][3072, 1]cuda:0", arg494_1: "f32[768][1]cuda:0", arg495_1: "f32[768][1]cuda:0", arg496_1: "f32[768][1]cuda:0", arg497_1: "f32[2304, 768][768, 1]cuda:0", arg498_1: "f32[2304][1]cuda:0", arg499_1: "f32[768, 768][768, 1]cuda:0", arg500_1: "f32[768][1]cuda:0", arg501_1: "f32[768][1]cuda:0", arg502_1: "f32[768][1]cuda:0", arg503_1: "f32[3072, 768][768, 1]cuda:0", arg504_1: "f32[3072][1]cuda:0", arg505_1: "f32[768, 3072][3072, 1]cuda:0", arg506_1: "f32[768][1]cuda:0", arg507_1: "f32[768][1]cuda:0", arg508_1: "f32[768][1]cuda:0", arg509_1: "f32[2304, 768][768, 1]cuda:0", arg510_1: "f32[2304][1]cuda:0", arg511_1: "f32[768, 768][768, 1]cuda:0", arg512_1: "f32[768][1]cuda:0", arg513_1: "f32[768][1]cuda:0", arg514_1: "f32[768][1]cuda:0", arg515_1: "f32[3072, 768][768, 1]cuda:0", arg516_1: "f32[3072][1]cuda:0", arg517_1: "f32[768, 3072][3072, 1]cuda:0", arg518_1: "f32[768][1]cuda:0", arg519_1: "f32[768][1]cuda:0", arg520_1: "f32[768][1]cuda:0", arg521_1: "f32[2304, 768][768, 1]cuda:0", arg522_1: "f32[2304][1]cuda:0", arg523_1: "f32[768, 768][768, 1]cuda:0", arg524_1: "f32[768][1]cuda:0", arg525_1: "f32[768][1]cuda:0", arg526_1: "f32[768][1]cuda:0", arg527_1: "f32[3072, 768][768, 1]cuda:0", arg528_1: "f32[3072][1]cuda:0", arg529_1: "f32[768, 3072][3072, 1]cuda:0", arg530_1: "f32[768][1]cuda:0", arg531_1: "f32[768][1]cuda:0", arg532_1: "f32[768][1]cuda:0", arg533_1: "f32[2304, 768][768, 1]cuda:0", arg534_1: "f32[2304][1]cuda:0", arg535_1: "f32[768, 768][768, 1]cuda:0", arg536_1: "f32[768][1]cuda:0", arg537_1: "f32[768][1]cuda:0", arg538_1: "f32[768][1]cuda:0", arg539_1: "f32[3072, 768][768, 1]cuda:0", arg540_1: "f32[3072][1]cuda:0", arg541_1: "f32[768, 3072][3072, 1]cuda:0", arg542_1: "f32[768][1]cuda:0", arg543_1: "f32[768][1]cuda:0", arg544_1: "f32[768][1]cuda:0", arg545_1: "f32[2304, 768][768, 1]cuda:0", arg546_1: "f32[2304][1]cuda:0", arg547_1: "f32[768, 768][768, 1]cuda:0", arg548_1: "f32[768][1]cuda:0", arg549_1: "f32[768][1]cuda:0", arg550_1: "f32[768][1]cuda:0", arg551_1: "f32[3072, 768][768, 1]cuda:0", arg552_1: "f32[3072][1]cuda:0", arg553_1: "f32[768, 3072][3072, 1]cuda:0", arg554_1: "f32[768][1]cuda:0", arg555_1: "f32[768][1]cuda:0", arg556_1: "f32[768][1]cuda:0", arg557_1: "f32[2304, 768][768, 1]cuda:0", arg558_1: "f32[2304][1]cuda:0", arg559_1: "f32[768, 768][768, 1]cuda:0", arg560_1: "f32[768][1]cuda:0", arg561_1: "f32[768][1]cuda:0", arg562_1: "f32[768][1]cuda:0", arg563_1: "f32[3072, 768][768, 1]cuda:0", arg564_1: "f32[3072][1]cuda:0", arg565_1: "f32[768, 3072][3072, 1]cuda:0", arg566_1: "f32[768][1]cuda:0", arg567_1: "f32[768][1]cuda:0", arg568_1: "f32[768][1]cuda:0", arg569_1: "f32[2304, 768][768, 1]cuda:0", arg570_1: "f32[2304][1]cuda:0", arg571_1: "f32[768, 768][768, 1]cuda:0", arg572_1: "f32[768][1]cuda:0", arg573_1: "f32[768][1]cuda:0", arg574_1: "f32[768][1]cuda:0", arg575_1: "f32[3072, 768][768, 1]cuda:0", arg576_1: "f32[3072][1]cuda:0", arg577_1: "f32[768, 3072][3072, 1]cuda:0", arg578_1: "f32[768][1]cuda:0", arg579_1: "f32[768][1]cuda:0", arg580_1: "f32[768][1]cuda:0", arg581_1: "f32[2304, 768][768, 1]cuda:0", arg582_1: "f32[2304][1]cuda:0", arg583_1: "f32[768, 768][768, 1]cuda:0", arg584_1: "f32[768][1]cuda:0", arg585_1: "f32[768][1]cuda:0", arg586_1: "f32[768][1]cuda:0", arg587_1: "f32[3072, 768][768, 1]cuda:0", arg588_1: "f32[3072][1]cuda:0", arg589_1: "f32[768, 3072][3072, 1]cuda:0", arg590_1: "f32[768][1]cuda:0", arg591_1: "f32[768][1]cuda:0", arg592_1: "f32[768][1]cuda:0", arg593_1: "f32[][]cuda:0", arg594_1: "f32[][]cuda:0", arg595_1: "f32[][]cuda:0", arg596_1: "f32[][]cuda:0", arg597_1: "f32[][]cuda:0", arg598_1: "f32[][]cuda:0", arg599_1: "f32[][]cuda:0", arg600_1: "f32[][]cuda:0", arg601_1: "f32[][]cuda:0", arg602_1: "f32[][]cuda:0", arg603_1: "f32[][]cuda:0", arg604_1: "f32[][]cuda:0", arg605_1: "f32[][]cuda:0", arg606_1: "f32[][]cuda:0", arg607_1: "f32[][]cuda:0", arg608_1: "f32[][]cuda:0", arg609_1: "f32[][]cuda:0", arg610_1: "f32[][]cuda:0", arg611_1: "f32[][]cuda:0", arg612_1: "f32[][]cuda:0", arg613_1: "f32[][]cuda:0", arg614_1: "f32[][]cuda:0", arg615_1: "f32[][]cuda:0", arg616_1: "f32[][]cuda:0", arg617_1: "f32[][]cuda:0", arg618_1: "f32[][]cuda:0", arg619_1: "f32[][]cuda:0", arg620_1: "f32[][]cuda:0", arg621_1: "f32[][]cuda:0", arg622_1: "f32[][]cuda:0", arg623_1: "f32[][]cuda:0", arg624_1: "f32[][]cuda:0", arg625_1: "f32[][]cuda:0", arg626_1: "f32[][]cuda:0", arg627_1: "f32[][]cuda:0", arg628_1: "f32[][]cuda:0", arg629_1: "f32[][]cuda:0", arg630_1: "f32[][]cuda:0", arg631_1: "f32[][]cuda:0", arg632_1: "f32[][]cuda:0", arg633_1: "f32[][]cuda:0", arg634_1: "f32[][]cuda:0", arg635_1: "f32[][]cuda:0", arg636_1: "f32[][]cuda:0", arg637_1: "f32[][]cuda:0", arg638_1: "f32[][]cuda:0", arg639_1: "f32[][]cuda:0", arg640_1: "f32[][]cuda:0", arg641_1: "f32[][]cuda:0", arg642_1: "f32[][]cuda:0", arg643_1: "f32[][]cuda:0", arg644_1: "f32[][]cuda:0", arg645_1: "f32[][]cuda:0", arg646_1: "f32[][]cuda:0", arg647_1: "f32[][]cuda:0", arg648_1: "f32[][]cuda:0", arg649_1: "f32[][]cuda:0", arg650_1: "f32[][]cuda:0", arg651_1: "f32[][]cuda:0", arg652_1: "f32[][]cuda:0", arg653_1: "f32[][]cuda:0", arg654_1: "f32[][]cuda:0", arg655_1: "f32[][]cuda:0", arg656_1: "f32[][]cuda:0", arg657_1: "f32[][]cuda:0", arg658_1: "f32[][]cuda:0", arg659_1: "f32[][]cuda:0", arg660_1: "f32[][]cuda:0", arg661_1: "f32[][]cuda:0", arg662_1: "f32[][]cuda:0", arg663_1: "f32[][]cuda:0", arg664_1: "f32[][]cuda:0", arg665_1: "f32[][]cuda:0", arg666_1: "f32[][]cuda:0", arg667_1: "f32[][]cuda:0", arg668_1: "f32[][]cuda:0", arg669_1: "f32[][]cuda:0", arg670_1: "f32[][]cuda:0", arg671_1: "f32[][]cuda:0", arg672_1: "f32[][]cuda:0", arg673_1: "f32[][]cuda:0", arg674_1: "f32[][]cuda:0", arg675_1: "f32[][]cuda:0", arg676_1: "f32[][]cuda:0", arg677_1: "f32[][]cuda:0", arg678_1: "f32[][]cuda:0", arg679_1: "f32[][]cuda:0", arg680_1: "f32[][]cuda:0", arg681_1: "f32[][]cuda:0", arg682_1: "f32[][]cuda:0", arg683_1: "f32[][]cuda:0", arg684_1: "f32[][]cuda:0", arg685_1: "f32[][]cuda:0", arg686_1: "f32[][]cuda:0", arg687_1: "f32[][]cuda:0", arg688_1: "f32[][]cuda:0", arg689_1: "f32[][]cuda:0", arg690_1: "f32[][]cuda:0", arg691_1: "f32[][]cuda:0", arg692_1: "f32[][]cuda:0", arg693_1: "f32[][]cuda:0", arg694_1: "f32[][]cuda:0", arg695_1: "f32[][]cuda:0", arg696_1: "f32[][]cuda:0", arg697_1: "f32[][]cuda:0", arg698_1: "f32[][]cuda:0", arg699_1: "f32[][]cuda:0", arg700_1: "f32[][]cuda:0", arg701_1: "f32[][]cuda:0", arg702_1: "f32[][]cuda:0", arg703_1: "f32[][]cuda:0", arg704_1: "f32[][]cuda:0", arg705_1: "f32[][]cuda:0", arg706_1: "f32[][]cuda:0", arg707_1: "f32[][]cuda:0", arg708_1: "f32[][]cuda:0", arg709_1: "f32[][]cuda:0", arg710_1: "f32[][]cuda:0", arg711_1: "f32[][]cuda:0", arg712_1: "f32[][]cuda:0", arg713_1: "f32[][]cuda:0", arg714_1: "f32[][]cuda:0", arg715_1: "f32[][]cuda:0", arg716_1: "f32[][]cuda:0", arg717_1: "f32[][]cuda:0", arg718_1: "f32[][]cuda:0", arg719_1: "f32[][]cuda:0", arg720_1: "f32[][]cuda:0", arg721_1: "f32[][]cuda:0", arg722_1: "f32[][]cuda:0", arg723_1: "f32[][]cuda:0", arg724_1: "f32[][]cuda:0", arg725_1: "f32[][]cuda:0", arg726_1: "f32[][]cuda:0", arg727_1: "f32[][]cuda:0", arg728_1: "f32[][]cuda:0", arg729_1: "f32[][]cuda:0", arg730_1: "f32[][]cuda:0", arg731_1: "f32[][]cuda:0", arg732_1: "f32[][]cuda:0", arg733_1: "f32[][]cuda:0", arg734_1: "f32[][]cuda:0", arg735_1: "f32[][]cuda:0", arg736_1: "f32[][]cuda:0", arg737_1: "f32[][]cuda:0", arg738_1: "f32[][]cuda:0", arg739_1: "f32[][]cuda:0"): + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1) + _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1) + getitem: "f32[][]cuda:0" = _foreach_add[0] + getitem_1: "f32[][]cuda:0" = _foreach_add[1] + getitem_2: "f32[][]cuda:0" = _foreach_add[2] + getitem_3: "f32[][]cuda:0" = _foreach_add[3] + getitem_4: "f32[][]cuda:0" = _foreach_add[4] + getitem_5: "f32[][]cuda:0" = _foreach_add[5] + getitem_6: "f32[][]cuda:0" = _foreach_add[6] + getitem_7: "f32[][]cuda:0" = _foreach_add[7] + getitem_8: "f32[][]cuda:0" = _foreach_add[8] + getitem_9: "f32[][]cuda:0" = _foreach_add[9] + getitem_10: "f32[][]cuda:0" = _foreach_add[10] + getitem_11: "f32[][]cuda:0" = _foreach_add[11] + getitem_12: "f32[][]cuda:0" = _foreach_add[12] + getitem_13: "f32[][]cuda:0" = _foreach_add[13] + getitem_14: "f32[][]cuda:0" = _foreach_add[14] + getitem_15: "f32[][]cuda:0" = _foreach_add[15] + getitem_16: "f32[][]cuda:0" = _foreach_add[16] + getitem_17: "f32[][]cuda:0" = _foreach_add[17] + getitem_18: "f32[][]cuda:0" = _foreach_add[18] + getitem_19: "f32[][]cuda:0" = _foreach_add[19] + getitem_20: "f32[][]cuda:0" = _foreach_add[20] + getitem_21: "f32[][]cuda:0" = _foreach_add[21] + getitem_22: "f32[][]cuda:0" = _foreach_add[22] + getitem_23: "f32[][]cuda:0" = _foreach_add[23] + getitem_24: "f32[][]cuda:0" = _foreach_add[24] + getitem_25: "f32[][]cuda:0" = _foreach_add[25] + getitem_26: "f32[][]cuda:0" = _foreach_add[26] + getitem_27: "f32[][]cuda:0" = _foreach_add[27] + getitem_28: "f32[][]cuda:0" = _foreach_add[28] + getitem_29: "f32[][]cuda:0" = _foreach_add[29] + getitem_30: "f32[][]cuda:0" = _foreach_add[30] + getitem_31: "f32[][]cuda:0" = _foreach_add[31] + getitem_32: "f32[][]cuda:0" = _foreach_add[32] + getitem_33: "f32[][]cuda:0" = _foreach_add[33] + getitem_34: "f32[][]cuda:0" = _foreach_add[34] + getitem_35: "f32[][]cuda:0" = _foreach_add[35] + getitem_36: "f32[][]cuda:0" = _foreach_add[36] + getitem_37: "f32[][]cuda:0" = _foreach_add[37] + getitem_38: "f32[][]cuda:0" = _foreach_add[38] + getitem_39: "f32[][]cuda:0" = _foreach_add[39] + getitem_40: "f32[][]cuda:0" = _foreach_add[40] + getitem_41: "f32[][]cuda:0" = _foreach_add[41] + getitem_42: "f32[][]cuda:0" = _foreach_add[42] + getitem_43: "f32[][]cuda:0" = _foreach_add[43] + getitem_44: "f32[][]cuda:0" = _foreach_add[44] + getitem_45: "f32[][]cuda:0" = _foreach_add[45] + getitem_46: "f32[][]cuda:0" = _foreach_add[46] + getitem_47: "f32[][]cuda:0" = _foreach_add[47] + getitem_48: "f32[][]cuda:0" = _foreach_add[48] + getitem_49: "f32[][]cuda:0" = _foreach_add[49] + getitem_50: "f32[][]cuda:0" = _foreach_add[50] + getitem_51: "f32[][]cuda:0" = _foreach_add[51] + getitem_52: "f32[][]cuda:0" = _foreach_add[52] + getitem_53: "f32[][]cuda:0" = _foreach_add[53] + getitem_54: "f32[][]cuda:0" = _foreach_add[54] + getitem_55: "f32[][]cuda:0" = _foreach_add[55] + getitem_56: "f32[][]cuda:0" = _foreach_add[56] + getitem_57: "f32[][]cuda:0" = _foreach_add[57] + getitem_58: "f32[][]cuda:0" = _foreach_add[58] + getitem_59: "f32[][]cuda:0" = _foreach_add[59] + getitem_60: "f32[][]cuda:0" = _foreach_add[60] + getitem_61: "f32[][]cuda:0" = _foreach_add[61] + getitem_62: "f32[][]cuda:0" = _foreach_add[62] + getitem_63: "f32[][]cuda:0" = _foreach_add[63] + getitem_64: "f32[][]cuda:0" = _foreach_add[64] + getitem_65: "f32[][]cuda:0" = _foreach_add[65] + getitem_66: "f32[][]cuda:0" = _foreach_add[66] + getitem_67: "f32[][]cuda:0" = _foreach_add[67] + getitem_68: "f32[][]cuda:0" = _foreach_add[68] + getitem_69: "f32[][]cuda:0" = _foreach_add[69] + getitem_70: "f32[][]cuda:0" = _foreach_add[70] + getitem_71: "f32[][]cuda:0" = _foreach_add[71] + getitem_72: "f32[][]cuda:0" = _foreach_add[72] + getitem_73: "f32[][]cuda:0" = _foreach_add[73] + getitem_74: "f32[][]cuda:0" = _foreach_add[74] + getitem_75: "f32[][]cuda:0" = _foreach_add[75] + getitem_76: "f32[][]cuda:0" = _foreach_add[76] + getitem_77: "f32[][]cuda:0" = _foreach_add[77] + getitem_78: "f32[][]cuda:0" = _foreach_add[78] + getitem_79: "f32[][]cuda:0" = _foreach_add[79] + getitem_80: "f32[][]cuda:0" = _foreach_add[80] + getitem_81: "f32[][]cuda:0" = _foreach_add[81] + getitem_82: "f32[][]cuda:0" = _foreach_add[82] + getitem_83: "f32[][]cuda:0" = _foreach_add[83] + getitem_84: "f32[][]cuda:0" = _foreach_add[84] + getitem_85: "f32[][]cuda:0" = _foreach_add[85] + getitem_86: "f32[][]cuda:0" = _foreach_add[86] + getitem_87: "f32[][]cuda:0" = _foreach_add[87] + getitem_88: "f32[][]cuda:0" = _foreach_add[88] + getitem_89: "f32[][]cuda:0" = _foreach_add[89] + getitem_90: "f32[][]cuda:0" = _foreach_add[90] + getitem_91: "f32[][]cuda:0" = _foreach_add[91] + getitem_92: "f32[][]cuda:0" = _foreach_add[92] + getitem_93: "f32[][]cuda:0" = _foreach_add[93] + getitem_94: "f32[][]cuda:0" = _foreach_add[94] + getitem_95: "f32[][]cuda:0" = _foreach_add[95] + getitem_96: "f32[][]cuda:0" = _foreach_add[96] + getitem_97: "f32[][]cuda:0" = _foreach_add[97] + getitem_98: "f32[][]cuda:0" = _foreach_add[98] + getitem_99: "f32[][]cuda:0" = _foreach_add[99] + getitem_100: "f32[][]cuda:0" = _foreach_add[100] + getitem_101: "f32[][]cuda:0" = _foreach_add[101] + getitem_102: "f32[][]cuda:0" = _foreach_add[102] + getitem_103: "f32[][]cuda:0" = _foreach_add[103] + getitem_104: "f32[][]cuda:0" = _foreach_add[104] + getitem_105: "f32[][]cuda:0" = _foreach_add[105] + getitem_106: "f32[][]cuda:0" = _foreach_add[106] + getitem_107: "f32[][]cuda:0" = _foreach_add[107] + getitem_108: "f32[][]cuda:0" = _foreach_add[108] + getitem_109: "f32[][]cuda:0" = _foreach_add[109] + getitem_110: "f32[][]cuda:0" = _foreach_add[110] + getitem_111: "f32[][]cuda:0" = _foreach_add[111] + getitem_112: "f32[][]cuda:0" = _foreach_add[112] + getitem_113: "f32[][]cuda:0" = _foreach_add[113] + getitem_114: "f32[][]cuda:0" = _foreach_add[114] + getitem_115: "f32[][]cuda:0" = _foreach_add[115] + getitem_116: "f32[][]cuda:0" = _foreach_add[116] + getitem_117: "f32[][]cuda:0" = _foreach_add[117] + getitem_118: "f32[][]cuda:0" = _foreach_add[118] + getitem_119: "f32[][]cuda:0" = _foreach_add[119] + getitem_120: "f32[][]cuda:0" = _foreach_add[120] + getitem_121: "f32[][]cuda:0" = _foreach_add[121] + getitem_122: "f32[][]cuda:0" = _foreach_add[122] + getitem_123: "f32[][]cuda:0" = _foreach_add[123] + getitem_124: "f32[][]cuda:0" = _foreach_add[124] + getitem_125: "f32[][]cuda:0" = _foreach_add[125] + getitem_126: "f32[][]cuda:0" = _foreach_add[126] + getitem_127: "f32[][]cuda:0" = _foreach_add[127] + getitem_128: "f32[][]cuda:0" = _foreach_add[128] + getitem_129: "f32[][]cuda:0" = _foreach_add[129] + getitem_130: "f32[][]cuda:0" = _foreach_add[130] + getitem_131: "f32[][]cuda:0" = _foreach_add[131] + getitem_132: "f32[][]cuda:0" = _foreach_add[132] + getitem_133: "f32[][]cuda:0" = _foreach_add[133] + getitem_134: "f32[][]cuda:0" = _foreach_add[134] + getitem_135: "f32[][]cuda:0" = _foreach_add[135] + getitem_136: "f32[][]cuda:0" = _foreach_add[136] + getitem_137: "f32[][]cuda:0" = _foreach_add[137] + getitem_138: "f32[][]cuda:0" = _foreach_add[138] + getitem_139: "f32[][]cuda:0" = _foreach_add[139] + getitem_140: "f32[][]cuda:0" = _foreach_add[140] + getitem_141: "f32[][]cuda:0" = _foreach_add[141] + getitem_142: "f32[][]cuda:0" = _foreach_add[142] + getitem_143: "f32[][]cuda:0" = _foreach_add[143] + getitem_144: "f32[][]cuda:0" = _foreach_add[144] + getitem_145: "f32[][]cuda:0" = _foreach_add[145] + getitem_146: "f32[][]cuda:0" = _foreach_add[146] + getitem_147: "f32[][]cuda:0" = _foreach_add[147]; _foreach_add = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1) + _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1]) + getitem_148: "f32[50304, 768][768, 1]cuda:0" = _foreach_sub[0] + getitem_149: "f32[1024, 768][768, 1]cuda:0" = _foreach_sub[1] + getitem_150: "f32[768][1]cuda:0" = _foreach_sub[2] + getitem_151: "f32[768][1]cuda:0" = _foreach_sub[3] + getitem_152: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[4] + getitem_153: "f32[2304][1]cuda:0" = _foreach_sub[5] + getitem_154: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[6] + getitem_155: "f32[768][1]cuda:0" = _foreach_sub[7] + getitem_156: "f32[768][1]cuda:0" = _foreach_sub[8] + getitem_157: "f32[768][1]cuda:0" = _foreach_sub[9] + getitem_158: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[10] + getitem_159: "f32[3072][1]cuda:0" = _foreach_sub[11] + getitem_160: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[12] + getitem_161: "f32[768][1]cuda:0" = _foreach_sub[13] + getitem_162: "f32[768][1]cuda:0" = _foreach_sub[14] + getitem_163: "f32[768][1]cuda:0" = _foreach_sub[15] + getitem_164: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[16] + getitem_165: "f32[2304][1]cuda:0" = _foreach_sub[17] + getitem_166: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[18] + getitem_167: "f32[768][1]cuda:0" = _foreach_sub[19] + getitem_168: "f32[768][1]cuda:0" = _foreach_sub[20] + getitem_169: "f32[768][1]cuda:0" = _foreach_sub[21] + getitem_170: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[22] + getitem_171: "f32[3072][1]cuda:0" = _foreach_sub[23] + getitem_172: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[24] + getitem_173: "f32[768][1]cuda:0" = _foreach_sub[25] + getitem_174: "f32[768][1]cuda:0" = _foreach_sub[26] + getitem_175: "f32[768][1]cuda:0" = _foreach_sub[27] + getitem_176: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[28] + getitem_177: "f32[2304][1]cuda:0" = _foreach_sub[29] + getitem_178: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[30] + getitem_179: "f32[768][1]cuda:0" = _foreach_sub[31] + getitem_180: "f32[768][1]cuda:0" = _foreach_sub[32] + getitem_181: "f32[768][1]cuda:0" = _foreach_sub[33] + getitem_182: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[34] + getitem_183: "f32[3072][1]cuda:0" = _foreach_sub[35] + getitem_184: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[36] + getitem_185: "f32[768][1]cuda:0" = _foreach_sub[37] + getitem_186: "f32[768][1]cuda:0" = _foreach_sub[38] + getitem_187: "f32[768][1]cuda:0" = _foreach_sub[39] + getitem_188: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[40] + getitem_189: "f32[2304][1]cuda:0" = _foreach_sub[41] + getitem_190: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[42] + getitem_191: "f32[768][1]cuda:0" = _foreach_sub[43] + getitem_192: "f32[768][1]cuda:0" = _foreach_sub[44] + getitem_193: "f32[768][1]cuda:0" = _foreach_sub[45] + getitem_194: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[46] + getitem_195: "f32[3072][1]cuda:0" = _foreach_sub[47] + getitem_196: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[48] + getitem_197: "f32[768][1]cuda:0" = _foreach_sub[49] + getitem_198: "f32[768][1]cuda:0" = _foreach_sub[50] + getitem_199: "f32[768][1]cuda:0" = _foreach_sub[51] + getitem_200: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[52] + getitem_201: "f32[2304][1]cuda:0" = _foreach_sub[53] + getitem_202: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[54] + getitem_203: "f32[768][1]cuda:0" = _foreach_sub[55] + getitem_204: "f32[768][1]cuda:0" = _foreach_sub[56] + getitem_205: "f32[768][1]cuda:0" = _foreach_sub[57] + getitem_206: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[58] + getitem_207: "f32[3072][1]cuda:0" = _foreach_sub[59] + getitem_208: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[60] + getitem_209: "f32[768][1]cuda:0" = _foreach_sub[61] + getitem_210: "f32[768][1]cuda:0" = _foreach_sub[62] + getitem_211: "f32[768][1]cuda:0" = _foreach_sub[63] + getitem_212: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[64] + getitem_213: "f32[2304][1]cuda:0" = _foreach_sub[65] + getitem_214: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[66] + getitem_215: "f32[768][1]cuda:0" = _foreach_sub[67] + getitem_216: "f32[768][1]cuda:0" = _foreach_sub[68] + getitem_217: "f32[768][1]cuda:0" = _foreach_sub[69] + getitem_218: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[70] + getitem_219: "f32[3072][1]cuda:0" = _foreach_sub[71] + getitem_220: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[72] + getitem_221: "f32[768][1]cuda:0" = _foreach_sub[73] + getitem_222: "f32[768][1]cuda:0" = _foreach_sub[74] + getitem_223: "f32[768][1]cuda:0" = _foreach_sub[75] + getitem_224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[76] + getitem_225: "f32[2304][1]cuda:0" = _foreach_sub[77] + getitem_226: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[78] + getitem_227: "f32[768][1]cuda:0" = _foreach_sub[79] + getitem_228: "f32[768][1]cuda:0" = _foreach_sub[80] + getitem_229: "f32[768][1]cuda:0" = _foreach_sub[81] + getitem_230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[82] + getitem_231: "f32[3072][1]cuda:0" = _foreach_sub[83] + getitem_232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[84] + getitem_233: "f32[768][1]cuda:0" = _foreach_sub[85] + getitem_234: "f32[768][1]cuda:0" = _foreach_sub[86] + getitem_235: "f32[768][1]cuda:0" = _foreach_sub[87] + getitem_236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[88] + getitem_237: "f32[2304][1]cuda:0" = _foreach_sub[89] + getitem_238: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[90] + getitem_239: "f32[768][1]cuda:0" = _foreach_sub[91] + getitem_240: "f32[768][1]cuda:0" = _foreach_sub[92] + getitem_241: "f32[768][1]cuda:0" = _foreach_sub[93] + getitem_242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[94] + getitem_243: "f32[3072][1]cuda:0" = _foreach_sub[95] + getitem_244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[96] + getitem_245: "f32[768][1]cuda:0" = _foreach_sub[97] + getitem_246: "f32[768][1]cuda:0" = _foreach_sub[98] + getitem_247: "f32[768][1]cuda:0" = _foreach_sub[99] + getitem_248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[100] + getitem_249: "f32[2304][1]cuda:0" = _foreach_sub[101] + getitem_250: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[102] + getitem_251: "f32[768][1]cuda:0" = _foreach_sub[103] + getitem_252: "f32[768][1]cuda:0" = _foreach_sub[104] + getitem_253: "f32[768][1]cuda:0" = _foreach_sub[105] + getitem_254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[106] + getitem_255: "f32[3072][1]cuda:0" = _foreach_sub[107] + getitem_256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[108] + getitem_257: "f32[768][1]cuda:0" = _foreach_sub[109] + getitem_258: "f32[768][1]cuda:0" = _foreach_sub[110] + getitem_259: "f32[768][1]cuda:0" = _foreach_sub[111] + getitem_260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[112] + getitem_261: "f32[2304][1]cuda:0" = _foreach_sub[113] + getitem_262: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[114] + getitem_263: "f32[768][1]cuda:0" = _foreach_sub[115] + getitem_264: "f32[768][1]cuda:0" = _foreach_sub[116] + getitem_265: "f32[768][1]cuda:0" = _foreach_sub[117] + getitem_266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[118] + getitem_267: "f32[3072][1]cuda:0" = _foreach_sub[119] + getitem_268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[120] + getitem_269: "f32[768][1]cuda:0" = _foreach_sub[121] + getitem_270: "f32[768][1]cuda:0" = _foreach_sub[122] + getitem_271: "f32[768][1]cuda:0" = _foreach_sub[123] + getitem_272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[124] + getitem_273: "f32[2304][1]cuda:0" = _foreach_sub[125] + getitem_274: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[126] + getitem_275: "f32[768][1]cuda:0" = _foreach_sub[127] + getitem_276: "f32[768][1]cuda:0" = _foreach_sub[128] + getitem_277: "f32[768][1]cuda:0" = _foreach_sub[129] + getitem_278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[130] + getitem_279: "f32[3072][1]cuda:0" = _foreach_sub[131] + getitem_280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[132] + getitem_281: "f32[768][1]cuda:0" = _foreach_sub[133] + getitem_282: "f32[768][1]cuda:0" = _foreach_sub[134] + getitem_283: "f32[768][1]cuda:0" = _foreach_sub[135] + getitem_284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[136] + getitem_285: "f32[2304][1]cuda:0" = _foreach_sub[137] + getitem_286: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[138] + getitem_287: "f32[768][1]cuda:0" = _foreach_sub[139] + getitem_288: "f32[768][1]cuda:0" = _foreach_sub[140] + getitem_289: "f32[768][1]cuda:0" = _foreach_sub[141] + getitem_290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[142] + getitem_291: "f32[3072][1]cuda:0" = _foreach_sub[143] + getitem_292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[144] + getitem_293: "f32[768][1]cuda:0" = _foreach_sub[145] + getitem_294: "f32[768][1]cuda:0" = _foreach_sub[146] + getitem_295: "f32[768][1]cuda:0" = _foreach_sub[147]; _foreach_sub = None + _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998); getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None + getitem_296: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul[0] + getitem_297: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul[1] + getitem_298: "f32[768][1]cuda:0" = _foreach_mul[2] + getitem_299: "f32[768][1]cuda:0" = _foreach_mul[3] + getitem_300: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[4] + getitem_301: "f32[2304][1]cuda:0" = _foreach_mul[5] + getitem_302: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[6] + getitem_303: "f32[768][1]cuda:0" = _foreach_mul[7] + getitem_304: "f32[768][1]cuda:0" = _foreach_mul[8] + getitem_305: "f32[768][1]cuda:0" = _foreach_mul[9] + getitem_306: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[10] + getitem_307: "f32[3072][1]cuda:0" = _foreach_mul[11] + getitem_308: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[12] + getitem_309: "f32[768][1]cuda:0" = _foreach_mul[13] + getitem_310: "f32[768][1]cuda:0" = _foreach_mul[14] + getitem_311: "f32[768][1]cuda:0" = _foreach_mul[15] + getitem_312: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[16] + getitem_313: "f32[2304][1]cuda:0" = _foreach_mul[17] + getitem_314: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[18] + getitem_315: "f32[768][1]cuda:0" = _foreach_mul[19] + getitem_316: "f32[768][1]cuda:0" = _foreach_mul[20] + getitem_317: "f32[768][1]cuda:0" = _foreach_mul[21] + getitem_318: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[22] + getitem_319: "f32[3072][1]cuda:0" = _foreach_mul[23] + getitem_320: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[24] + getitem_321: "f32[768][1]cuda:0" = _foreach_mul[25] + getitem_322: "f32[768][1]cuda:0" = _foreach_mul[26] + getitem_323: "f32[768][1]cuda:0" = _foreach_mul[27] + getitem_324: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[28] + getitem_325: "f32[2304][1]cuda:0" = _foreach_mul[29] + getitem_326: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[30] + getitem_327: "f32[768][1]cuda:0" = _foreach_mul[31] + getitem_328: "f32[768][1]cuda:0" = _foreach_mul[32] + getitem_329: "f32[768][1]cuda:0" = _foreach_mul[33] + getitem_330: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[34] + getitem_331: "f32[3072][1]cuda:0" = _foreach_mul[35] + getitem_332: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[36] + getitem_333: "f32[768][1]cuda:0" = _foreach_mul[37] + getitem_334: "f32[768][1]cuda:0" = _foreach_mul[38] + getitem_335: "f32[768][1]cuda:0" = _foreach_mul[39] + getitem_336: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[40] + getitem_337: "f32[2304][1]cuda:0" = _foreach_mul[41] + getitem_338: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[42] + getitem_339: "f32[768][1]cuda:0" = _foreach_mul[43] + getitem_340: "f32[768][1]cuda:0" = _foreach_mul[44] + getitem_341: "f32[768][1]cuda:0" = _foreach_mul[45] + getitem_342: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[46] + getitem_343: "f32[3072][1]cuda:0" = _foreach_mul[47] + getitem_344: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[48] + getitem_345: "f32[768][1]cuda:0" = _foreach_mul[49] + getitem_346: "f32[768][1]cuda:0" = _foreach_mul[50] + getitem_347: "f32[768][1]cuda:0" = _foreach_mul[51] + getitem_348: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[52] + getitem_349: "f32[2304][1]cuda:0" = _foreach_mul[53] + getitem_350: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[54] + getitem_351: "f32[768][1]cuda:0" = _foreach_mul[55] + getitem_352: "f32[768][1]cuda:0" = _foreach_mul[56] + getitem_353: "f32[768][1]cuda:0" = _foreach_mul[57] + getitem_354: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[58] + getitem_355: "f32[3072][1]cuda:0" = _foreach_mul[59] + getitem_356: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[60] + getitem_357: "f32[768][1]cuda:0" = _foreach_mul[61] + getitem_358: "f32[768][1]cuda:0" = _foreach_mul[62] + getitem_359: "f32[768][1]cuda:0" = _foreach_mul[63] + getitem_360: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[64] + getitem_361: "f32[2304][1]cuda:0" = _foreach_mul[65] + getitem_362: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[66] + getitem_363: "f32[768][1]cuda:0" = _foreach_mul[67] + getitem_364: "f32[768][1]cuda:0" = _foreach_mul[68] + getitem_365: "f32[768][1]cuda:0" = _foreach_mul[69] + getitem_366: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[70] + getitem_367: "f32[3072][1]cuda:0" = _foreach_mul[71] + getitem_368: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[72] + getitem_369: "f32[768][1]cuda:0" = _foreach_mul[73] + getitem_370: "f32[768][1]cuda:0" = _foreach_mul[74] + getitem_371: "f32[768][1]cuda:0" = _foreach_mul[75] + getitem_372: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[76] + getitem_373: "f32[2304][1]cuda:0" = _foreach_mul[77] + getitem_374: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[78] + getitem_375: "f32[768][1]cuda:0" = _foreach_mul[79] + getitem_376: "f32[768][1]cuda:0" = _foreach_mul[80] + getitem_377: "f32[768][1]cuda:0" = _foreach_mul[81] + getitem_378: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[82] + getitem_379: "f32[3072][1]cuda:0" = _foreach_mul[83] + getitem_380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[84] + getitem_381: "f32[768][1]cuda:0" = _foreach_mul[85] + getitem_382: "f32[768][1]cuda:0" = _foreach_mul[86] + getitem_383: "f32[768][1]cuda:0" = _foreach_mul[87] + getitem_384: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[88] + getitem_385: "f32[2304][1]cuda:0" = _foreach_mul[89] + getitem_386: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[90] + getitem_387: "f32[768][1]cuda:0" = _foreach_mul[91] + getitem_388: "f32[768][1]cuda:0" = _foreach_mul[92] + getitem_389: "f32[768][1]cuda:0" = _foreach_mul[93] + getitem_390: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[94] + getitem_391: "f32[3072][1]cuda:0" = _foreach_mul[95] + getitem_392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[96] + getitem_393: "f32[768][1]cuda:0" = _foreach_mul[97] + getitem_394: "f32[768][1]cuda:0" = _foreach_mul[98] + getitem_395: "f32[768][1]cuda:0" = _foreach_mul[99] + getitem_396: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[100] + getitem_397: "f32[2304][1]cuda:0" = _foreach_mul[101] + getitem_398: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[102] + getitem_399: "f32[768][1]cuda:0" = _foreach_mul[103] + getitem_400: "f32[768][1]cuda:0" = _foreach_mul[104] + getitem_401: "f32[768][1]cuda:0" = _foreach_mul[105] + getitem_402: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[106] + getitem_403: "f32[3072][1]cuda:0" = _foreach_mul[107] + getitem_404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[108] + getitem_405: "f32[768][1]cuda:0" = _foreach_mul[109] + getitem_406: "f32[768][1]cuda:0" = _foreach_mul[110] + getitem_407: "f32[768][1]cuda:0" = _foreach_mul[111] + getitem_408: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[112] + getitem_409: "f32[2304][1]cuda:0" = _foreach_mul[113] + getitem_410: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[114] + getitem_411: "f32[768][1]cuda:0" = _foreach_mul[115] + getitem_412: "f32[768][1]cuda:0" = _foreach_mul[116] + getitem_413: "f32[768][1]cuda:0" = _foreach_mul[117] + getitem_414: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[118] + getitem_415: "f32[3072][1]cuda:0" = _foreach_mul[119] + getitem_416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[120] + getitem_417: "f32[768][1]cuda:0" = _foreach_mul[121] + getitem_418: "f32[768][1]cuda:0" = _foreach_mul[122] + getitem_419: "f32[768][1]cuda:0" = _foreach_mul[123] + getitem_420: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[124] + getitem_421: "f32[2304][1]cuda:0" = _foreach_mul[125] + getitem_422: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[126] + getitem_423: "f32[768][1]cuda:0" = _foreach_mul[127] + getitem_424: "f32[768][1]cuda:0" = _foreach_mul[128] + getitem_425: "f32[768][1]cuda:0" = _foreach_mul[129] + getitem_426: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[130] + getitem_427: "f32[3072][1]cuda:0" = _foreach_mul[131] + getitem_428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[132] + getitem_429: "f32[768][1]cuda:0" = _foreach_mul[133] + getitem_430: "f32[768][1]cuda:0" = _foreach_mul[134] + getitem_431: "f32[768][1]cuda:0" = _foreach_mul[135] + getitem_432: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[136] + getitem_433: "f32[2304][1]cuda:0" = _foreach_mul[137] + getitem_434: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[138] + getitem_435: "f32[768][1]cuda:0" = _foreach_mul[139] + getitem_436: "f32[768][1]cuda:0" = _foreach_mul[140] + getitem_437: "f32[768][1]cuda:0" = _foreach_mul[141] + getitem_438: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[142] + getitem_439: "f32[3072][1]cuda:0" = _foreach_mul[143] + getitem_440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[144] + getitem_441: "f32[768][1]cuda:0" = _foreach_mul[145] + getitem_442: "f32[768][1]cuda:0" = _foreach_mul[146] + getitem_443: "f32[768][1]cuda:0" = _foreach_mul[147]; _foreach_mul = None + _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]); getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None + getitem_444: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_1[0] + getitem_445: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_1[1] + getitem_446: "f32[768][1]cuda:0" = _foreach_add_1[2] + getitem_447: "f32[768][1]cuda:0" = _foreach_add_1[3] + getitem_448: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[4] + getitem_449: "f32[2304][1]cuda:0" = _foreach_add_1[5] + getitem_450: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[6] + getitem_451: "f32[768][1]cuda:0" = _foreach_add_1[7] + getitem_452: "f32[768][1]cuda:0" = _foreach_add_1[8] + getitem_453: "f32[768][1]cuda:0" = _foreach_add_1[9] + getitem_454: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[10] + getitem_455: "f32[3072][1]cuda:0" = _foreach_add_1[11] + getitem_456: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[12] + getitem_457: "f32[768][1]cuda:0" = _foreach_add_1[13] + getitem_458: "f32[768][1]cuda:0" = _foreach_add_1[14] + getitem_459: "f32[768][1]cuda:0" = _foreach_add_1[15] + getitem_460: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[16] + getitem_461: "f32[2304][1]cuda:0" = _foreach_add_1[17] + getitem_462: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[18] + getitem_463: "f32[768][1]cuda:0" = _foreach_add_1[19] + getitem_464: "f32[768][1]cuda:0" = _foreach_add_1[20] + getitem_465: "f32[768][1]cuda:0" = _foreach_add_1[21] + getitem_466: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[22] + getitem_467: "f32[3072][1]cuda:0" = _foreach_add_1[23] + getitem_468: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[24] + getitem_469: "f32[768][1]cuda:0" = _foreach_add_1[25] + getitem_470: "f32[768][1]cuda:0" = _foreach_add_1[26] + getitem_471: "f32[768][1]cuda:0" = _foreach_add_1[27] + getitem_472: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[28] + getitem_473: "f32[2304][1]cuda:0" = _foreach_add_1[29] + getitem_474: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[30] + getitem_475: "f32[768][1]cuda:0" = _foreach_add_1[31] + getitem_476: "f32[768][1]cuda:0" = _foreach_add_1[32] + getitem_477: "f32[768][1]cuda:0" = _foreach_add_1[33] + getitem_478: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[34] + getitem_479: "f32[3072][1]cuda:0" = _foreach_add_1[35] + getitem_480: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[36] + getitem_481: "f32[768][1]cuda:0" = _foreach_add_1[37] + getitem_482: "f32[768][1]cuda:0" = _foreach_add_1[38] + getitem_483: "f32[768][1]cuda:0" = _foreach_add_1[39] + getitem_484: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[40] + getitem_485: "f32[2304][1]cuda:0" = _foreach_add_1[41] + getitem_486: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[42] + getitem_487: "f32[768][1]cuda:0" = _foreach_add_1[43] + getitem_488: "f32[768][1]cuda:0" = _foreach_add_1[44] + getitem_489: "f32[768][1]cuda:0" = _foreach_add_1[45] + getitem_490: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[46] + getitem_491: "f32[3072][1]cuda:0" = _foreach_add_1[47] + getitem_492: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[48] + getitem_493: "f32[768][1]cuda:0" = _foreach_add_1[49] + getitem_494: "f32[768][1]cuda:0" = _foreach_add_1[50] + getitem_495: "f32[768][1]cuda:0" = _foreach_add_1[51] + getitem_496: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[52] + getitem_497: "f32[2304][1]cuda:0" = _foreach_add_1[53] + getitem_498: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[54] + getitem_499: "f32[768][1]cuda:0" = _foreach_add_1[55] + getitem_500: "f32[768][1]cuda:0" = _foreach_add_1[56] + getitem_501: "f32[768][1]cuda:0" = _foreach_add_1[57] + getitem_502: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[58] + getitem_503: "f32[3072][1]cuda:0" = _foreach_add_1[59] + getitem_504: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[60] + getitem_505: "f32[768][1]cuda:0" = _foreach_add_1[61] + getitem_506: "f32[768][1]cuda:0" = _foreach_add_1[62] + getitem_507: "f32[768][1]cuda:0" = _foreach_add_1[63] + getitem_508: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[64] + getitem_509: "f32[2304][1]cuda:0" = _foreach_add_1[65] + getitem_510: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[66] + getitem_511: "f32[768][1]cuda:0" = _foreach_add_1[67] + getitem_512: "f32[768][1]cuda:0" = _foreach_add_1[68] + getitem_513: "f32[768][1]cuda:0" = _foreach_add_1[69] + getitem_514: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[70] + getitem_515: "f32[3072][1]cuda:0" = _foreach_add_1[71] + getitem_516: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[72] + getitem_517: "f32[768][1]cuda:0" = _foreach_add_1[73] + getitem_518: "f32[768][1]cuda:0" = _foreach_add_1[74] + getitem_519: "f32[768][1]cuda:0" = _foreach_add_1[75] + getitem_520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[76] + getitem_521: "f32[2304][1]cuda:0" = _foreach_add_1[77] + getitem_522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[78] + getitem_523: "f32[768][1]cuda:0" = _foreach_add_1[79] + getitem_524: "f32[768][1]cuda:0" = _foreach_add_1[80] + getitem_525: "f32[768][1]cuda:0" = _foreach_add_1[81] + getitem_526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[82] + getitem_527: "f32[3072][1]cuda:0" = _foreach_add_1[83] + getitem_528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[84] + getitem_529: "f32[768][1]cuda:0" = _foreach_add_1[85] + getitem_530: "f32[768][1]cuda:0" = _foreach_add_1[86] + getitem_531: "f32[768][1]cuda:0" = _foreach_add_1[87] + getitem_532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[88] + getitem_533: "f32[2304][1]cuda:0" = _foreach_add_1[89] + getitem_534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[90] + getitem_535: "f32[768][1]cuda:0" = _foreach_add_1[91] + getitem_536: "f32[768][1]cuda:0" = _foreach_add_1[92] + getitem_537: "f32[768][1]cuda:0" = _foreach_add_1[93] + getitem_538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[94] + getitem_539: "f32[3072][1]cuda:0" = _foreach_add_1[95] + getitem_540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[96] + getitem_541: "f32[768][1]cuda:0" = _foreach_add_1[97] + getitem_542: "f32[768][1]cuda:0" = _foreach_add_1[98] + getitem_543: "f32[768][1]cuda:0" = _foreach_add_1[99] + getitem_544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[100] + getitem_545: "f32[2304][1]cuda:0" = _foreach_add_1[101] + getitem_546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[102] + getitem_547: "f32[768][1]cuda:0" = _foreach_add_1[103] + getitem_548: "f32[768][1]cuda:0" = _foreach_add_1[104] + getitem_549: "f32[768][1]cuda:0" = _foreach_add_1[105] + getitem_550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[106] + getitem_551: "f32[3072][1]cuda:0" = _foreach_add_1[107] + getitem_552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[108] + getitem_553: "f32[768][1]cuda:0" = _foreach_add_1[109] + getitem_554: "f32[768][1]cuda:0" = _foreach_add_1[110] + getitem_555: "f32[768][1]cuda:0" = _foreach_add_1[111] + getitem_556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[112] + getitem_557: "f32[2304][1]cuda:0" = _foreach_add_1[113] + getitem_558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[114] + getitem_559: "f32[768][1]cuda:0" = _foreach_add_1[115] + getitem_560: "f32[768][1]cuda:0" = _foreach_add_1[116] + getitem_561: "f32[768][1]cuda:0" = _foreach_add_1[117] + getitem_562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[118] + getitem_563: "f32[3072][1]cuda:0" = _foreach_add_1[119] + getitem_564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[120] + getitem_565: "f32[768][1]cuda:0" = _foreach_add_1[121] + getitem_566: "f32[768][1]cuda:0" = _foreach_add_1[122] + getitem_567: "f32[768][1]cuda:0" = _foreach_add_1[123] + getitem_568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[124] + getitem_569: "f32[2304][1]cuda:0" = _foreach_add_1[125] + getitem_570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[126] + getitem_571: "f32[768][1]cuda:0" = _foreach_add_1[127] + getitem_572: "f32[768][1]cuda:0" = _foreach_add_1[128] + getitem_573: "f32[768][1]cuda:0" = _foreach_add_1[129] + getitem_574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[130] + getitem_575: "f32[3072][1]cuda:0" = _foreach_add_1[131] + getitem_576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[132] + getitem_577: "f32[768][1]cuda:0" = _foreach_add_1[133] + getitem_578: "f32[768][1]cuda:0" = _foreach_add_1[134] + getitem_579: "f32[768][1]cuda:0" = _foreach_add_1[135] + getitem_580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[136] + getitem_581: "f32[2304][1]cuda:0" = _foreach_add_1[137] + getitem_582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[138] + getitem_583: "f32[768][1]cuda:0" = _foreach_add_1[139] + getitem_584: "f32[768][1]cuda:0" = _foreach_add_1[140] + getitem_585: "f32[768][1]cuda:0" = _foreach_add_1[141] + getitem_586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[142] + getitem_587: "f32[3072][1]cuda:0" = _foreach_add_1[143] + getitem_588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[144] + getitem_589: "f32[768][1]cuda:0" = _foreach_add_1[145] + getitem_590: "f32[768][1]cuda:0" = _foreach_add_1[146] + getitem_591: "f32[768][1]cuda:0" = _foreach_add_1[147]; _foreach_add_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2) + _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999) + getitem_592: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_1[0] + getitem_593: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_1[1] + getitem_594: "f32[768][1]cuda:0" = _foreach_mul_1[2] + getitem_595: "f32[768][1]cuda:0" = _foreach_mul_1[3] + getitem_596: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[4] + getitem_597: "f32[2304][1]cuda:0" = _foreach_mul_1[5] + getitem_598: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[6] + getitem_599: "f32[768][1]cuda:0" = _foreach_mul_1[7] + getitem_600: "f32[768][1]cuda:0" = _foreach_mul_1[8] + getitem_601: "f32[768][1]cuda:0" = _foreach_mul_1[9] + getitem_602: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[10] + getitem_603: "f32[3072][1]cuda:0" = _foreach_mul_1[11] + getitem_604: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[12] + getitem_605: "f32[768][1]cuda:0" = _foreach_mul_1[13] + getitem_606: "f32[768][1]cuda:0" = _foreach_mul_1[14] + getitem_607: "f32[768][1]cuda:0" = _foreach_mul_1[15] + getitem_608: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[16] + getitem_609: "f32[2304][1]cuda:0" = _foreach_mul_1[17] + getitem_610: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[18] + getitem_611: "f32[768][1]cuda:0" = _foreach_mul_1[19] + getitem_612: "f32[768][1]cuda:0" = _foreach_mul_1[20] + getitem_613: "f32[768][1]cuda:0" = _foreach_mul_1[21] + getitem_614: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[22] + getitem_615: "f32[3072][1]cuda:0" = _foreach_mul_1[23] + getitem_616: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[24] + getitem_617: "f32[768][1]cuda:0" = _foreach_mul_1[25] + getitem_618: "f32[768][1]cuda:0" = _foreach_mul_1[26] + getitem_619: "f32[768][1]cuda:0" = _foreach_mul_1[27] + getitem_620: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[28] + getitem_621: "f32[2304][1]cuda:0" = _foreach_mul_1[29] + getitem_622: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[30] + getitem_623: "f32[768][1]cuda:0" = _foreach_mul_1[31] + getitem_624: "f32[768][1]cuda:0" = _foreach_mul_1[32] + getitem_625: "f32[768][1]cuda:0" = _foreach_mul_1[33] + getitem_626: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[34] + getitem_627: "f32[3072][1]cuda:0" = _foreach_mul_1[35] + getitem_628: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[36] + getitem_629: "f32[768][1]cuda:0" = _foreach_mul_1[37] + getitem_630: "f32[768][1]cuda:0" = _foreach_mul_1[38] + getitem_631: "f32[768][1]cuda:0" = _foreach_mul_1[39] + getitem_632: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[40] + getitem_633: "f32[2304][1]cuda:0" = _foreach_mul_1[41] + getitem_634: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[42] + getitem_635: "f32[768][1]cuda:0" = _foreach_mul_1[43] + getitem_636: "f32[768][1]cuda:0" = _foreach_mul_1[44] + getitem_637: "f32[768][1]cuda:0" = _foreach_mul_1[45] + getitem_638: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[46] + getitem_639: "f32[3072][1]cuda:0" = _foreach_mul_1[47] + getitem_640: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[48] + getitem_641: "f32[768][1]cuda:0" = _foreach_mul_1[49] + getitem_642: "f32[768][1]cuda:0" = _foreach_mul_1[50] + getitem_643: "f32[768][1]cuda:0" = _foreach_mul_1[51] + getitem_644: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[52] + getitem_645: "f32[2304][1]cuda:0" = _foreach_mul_1[53] + getitem_646: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[54] + getitem_647: "f32[768][1]cuda:0" = _foreach_mul_1[55] + getitem_648: "f32[768][1]cuda:0" = _foreach_mul_1[56] + getitem_649: "f32[768][1]cuda:0" = _foreach_mul_1[57] + getitem_650: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[58] + getitem_651: "f32[3072][1]cuda:0" = _foreach_mul_1[59] + getitem_652: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[60] + getitem_653: "f32[768][1]cuda:0" = _foreach_mul_1[61] + getitem_654: "f32[768][1]cuda:0" = _foreach_mul_1[62] + getitem_655: "f32[768][1]cuda:0" = _foreach_mul_1[63] + getitem_656: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[64] + getitem_657: "f32[2304][1]cuda:0" = _foreach_mul_1[65] + getitem_658: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[66] + getitem_659: "f32[768][1]cuda:0" = _foreach_mul_1[67] + getitem_660: "f32[768][1]cuda:0" = _foreach_mul_1[68] + getitem_661: "f32[768][1]cuda:0" = _foreach_mul_1[69] + getitem_662: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[70] + getitem_663: "f32[3072][1]cuda:0" = _foreach_mul_1[71] + getitem_664: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[72] + getitem_665: "f32[768][1]cuda:0" = _foreach_mul_1[73] + getitem_666: "f32[768][1]cuda:0" = _foreach_mul_1[74] + getitem_667: "f32[768][1]cuda:0" = _foreach_mul_1[75] + getitem_668: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[76] + getitem_669: "f32[2304][1]cuda:0" = _foreach_mul_1[77] + getitem_670: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[78] + getitem_671: "f32[768][1]cuda:0" = _foreach_mul_1[79] + getitem_672: "f32[768][1]cuda:0" = _foreach_mul_1[80] + getitem_673: "f32[768][1]cuda:0" = _foreach_mul_1[81] + getitem_674: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[82] + getitem_675: "f32[3072][1]cuda:0" = _foreach_mul_1[83] + getitem_676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[84] + getitem_677: "f32[768][1]cuda:0" = _foreach_mul_1[85] + getitem_678: "f32[768][1]cuda:0" = _foreach_mul_1[86] + getitem_679: "f32[768][1]cuda:0" = _foreach_mul_1[87] + getitem_680: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[88] + getitem_681: "f32[2304][1]cuda:0" = _foreach_mul_1[89] + getitem_682: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[90] + getitem_683: "f32[768][1]cuda:0" = _foreach_mul_1[91] + getitem_684: "f32[768][1]cuda:0" = _foreach_mul_1[92] + getitem_685: "f32[768][1]cuda:0" = _foreach_mul_1[93] + getitem_686: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[94] + getitem_687: "f32[3072][1]cuda:0" = _foreach_mul_1[95] + getitem_688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[96] + getitem_689: "f32[768][1]cuda:0" = _foreach_mul_1[97] + getitem_690: "f32[768][1]cuda:0" = _foreach_mul_1[98] + getitem_691: "f32[768][1]cuda:0" = _foreach_mul_1[99] + getitem_692: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[100] + getitem_693: "f32[2304][1]cuda:0" = _foreach_mul_1[101] + getitem_694: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[102] + getitem_695: "f32[768][1]cuda:0" = _foreach_mul_1[103] + getitem_696: "f32[768][1]cuda:0" = _foreach_mul_1[104] + getitem_697: "f32[768][1]cuda:0" = _foreach_mul_1[105] + getitem_698: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[106] + getitem_699: "f32[3072][1]cuda:0" = _foreach_mul_1[107] + getitem_700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[108] + getitem_701: "f32[768][1]cuda:0" = _foreach_mul_1[109] + getitem_702: "f32[768][1]cuda:0" = _foreach_mul_1[110] + getitem_703: "f32[768][1]cuda:0" = _foreach_mul_1[111] + getitem_704: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[112] + getitem_705: "f32[2304][1]cuda:0" = _foreach_mul_1[113] + getitem_706: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[114] + getitem_707: "f32[768][1]cuda:0" = _foreach_mul_1[115] + getitem_708: "f32[768][1]cuda:0" = _foreach_mul_1[116] + getitem_709: "f32[768][1]cuda:0" = _foreach_mul_1[117] + getitem_710: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[118] + getitem_711: "f32[3072][1]cuda:0" = _foreach_mul_1[119] + getitem_712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[120] + getitem_713: "f32[768][1]cuda:0" = _foreach_mul_1[121] + getitem_714: "f32[768][1]cuda:0" = _foreach_mul_1[122] + getitem_715: "f32[768][1]cuda:0" = _foreach_mul_1[123] + getitem_716: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[124] + getitem_717: "f32[2304][1]cuda:0" = _foreach_mul_1[125] + getitem_718: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[126] + getitem_719: "f32[768][1]cuda:0" = _foreach_mul_1[127] + getitem_720: "f32[768][1]cuda:0" = _foreach_mul_1[128] + getitem_721: "f32[768][1]cuda:0" = _foreach_mul_1[129] + getitem_722: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[130] + getitem_723: "f32[3072][1]cuda:0" = _foreach_mul_1[131] + getitem_724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[132] + getitem_725: "f32[768][1]cuda:0" = _foreach_mul_1[133] + getitem_726: "f32[768][1]cuda:0" = _foreach_mul_1[134] + getitem_727: "f32[768][1]cuda:0" = _foreach_mul_1[135] + getitem_728: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[136] + getitem_729: "f32[2304][1]cuda:0" = _foreach_mul_1[137] + getitem_730: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[138] + getitem_731: "f32[768][1]cuda:0" = _foreach_mul_1[139] + getitem_732: "f32[768][1]cuda:0" = _foreach_mul_1[140] + getitem_733: "f32[768][1]cuda:0" = _foreach_mul_1[141] + getitem_734: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[142] + getitem_735: "f32[3072][1]cuda:0" = _foreach_mul_1[143] + getitem_736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[144] + getitem_737: "f32[768][1]cuda:0" = _foreach_mul_1[145] + getitem_738: "f32[768][1]cuda:0" = _foreach_mul_1[146] + getitem_739: "f32[768][1]cuda:0" = _foreach_mul_1[147]; _foreach_mul_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]); arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None + getitem_740: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_2[0] + getitem_741: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_2[1] + getitem_742: "f32[768][1]cuda:0" = _foreach_mul_2[2] + getitem_743: "f32[768][1]cuda:0" = _foreach_mul_2[3] + getitem_744: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[4] + getitem_745: "f32[2304][1]cuda:0" = _foreach_mul_2[5] + getitem_746: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[6] + getitem_747: "f32[768][1]cuda:0" = _foreach_mul_2[7] + getitem_748: "f32[768][1]cuda:0" = _foreach_mul_2[8] + getitem_749: "f32[768][1]cuda:0" = _foreach_mul_2[9] + getitem_750: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[10] + getitem_751: "f32[3072][1]cuda:0" = _foreach_mul_2[11] + getitem_752: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[12] + getitem_753: "f32[768][1]cuda:0" = _foreach_mul_2[13] + getitem_754: "f32[768][1]cuda:0" = _foreach_mul_2[14] + getitem_755: "f32[768][1]cuda:0" = _foreach_mul_2[15] + getitem_756: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[16] + getitem_757: "f32[2304][1]cuda:0" = _foreach_mul_2[17] + getitem_758: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[18] + getitem_759: "f32[768][1]cuda:0" = _foreach_mul_2[19] + getitem_760: "f32[768][1]cuda:0" = _foreach_mul_2[20] + getitem_761: "f32[768][1]cuda:0" = _foreach_mul_2[21] + getitem_762: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[22] + getitem_763: "f32[3072][1]cuda:0" = _foreach_mul_2[23] + getitem_764: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[24] + getitem_765: "f32[768][1]cuda:0" = _foreach_mul_2[25] + getitem_766: "f32[768][1]cuda:0" = _foreach_mul_2[26] + getitem_767: "f32[768][1]cuda:0" = _foreach_mul_2[27] + getitem_768: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[28] + getitem_769: "f32[2304][1]cuda:0" = _foreach_mul_2[29] + getitem_770: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[30] + getitem_771: "f32[768][1]cuda:0" = _foreach_mul_2[31] + getitem_772: "f32[768][1]cuda:0" = _foreach_mul_2[32] + getitem_773: "f32[768][1]cuda:0" = _foreach_mul_2[33] + getitem_774: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[34] + getitem_775: "f32[3072][1]cuda:0" = _foreach_mul_2[35] + getitem_776: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[36] + getitem_777: "f32[768][1]cuda:0" = _foreach_mul_2[37] + getitem_778: "f32[768][1]cuda:0" = _foreach_mul_2[38] + getitem_779: "f32[768][1]cuda:0" = _foreach_mul_2[39] + getitem_780: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[40] + getitem_781: "f32[2304][1]cuda:0" = _foreach_mul_2[41] + getitem_782: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[42] + getitem_783: "f32[768][1]cuda:0" = _foreach_mul_2[43] + getitem_784: "f32[768][1]cuda:0" = _foreach_mul_2[44] + getitem_785: "f32[768][1]cuda:0" = _foreach_mul_2[45] + getitem_786: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[46] + getitem_787: "f32[3072][1]cuda:0" = _foreach_mul_2[47] + getitem_788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[48] + getitem_789: "f32[768][1]cuda:0" = _foreach_mul_2[49] + getitem_790: "f32[768][1]cuda:0" = _foreach_mul_2[50] + getitem_791: "f32[768][1]cuda:0" = _foreach_mul_2[51] + getitem_792: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[52] + getitem_793: "f32[2304][1]cuda:0" = _foreach_mul_2[53] + getitem_794: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[54] + getitem_795: "f32[768][1]cuda:0" = _foreach_mul_2[55] + getitem_796: "f32[768][1]cuda:0" = _foreach_mul_2[56] + getitem_797: "f32[768][1]cuda:0" = _foreach_mul_2[57] + getitem_798: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[58] + getitem_799: "f32[3072][1]cuda:0" = _foreach_mul_2[59] + getitem_800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[60] + getitem_801: "f32[768][1]cuda:0" = _foreach_mul_2[61] + getitem_802: "f32[768][1]cuda:0" = _foreach_mul_2[62] + getitem_803: "f32[768][1]cuda:0" = _foreach_mul_2[63] + getitem_804: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[64] + getitem_805: "f32[2304][1]cuda:0" = _foreach_mul_2[65] + getitem_806: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[66] + getitem_807: "f32[768][1]cuda:0" = _foreach_mul_2[67] + getitem_808: "f32[768][1]cuda:0" = _foreach_mul_2[68] + getitem_809: "f32[768][1]cuda:0" = _foreach_mul_2[69] + getitem_810: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[70] + getitem_811: "f32[3072][1]cuda:0" = _foreach_mul_2[71] + getitem_812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[72] + getitem_813: "f32[768][1]cuda:0" = _foreach_mul_2[73] + getitem_814: "f32[768][1]cuda:0" = _foreach_mul_2[74] + getitem_815: "f32[768][1]cuda:0" = _foreach_mul_2[75] + getitem_816: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[76] + getitem_817: "f32[2304][1]cuda:0" = _foreach_mul_2[77] + getitem_818: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[78] + getitem_819: "f32[768][1]cuda:0" = _foreach_mul_2[79] + getitem_820: "f32[768][1]cuda:0" = _foreach_mul_2[80] + getitem_821: "f32[768][1]cuda:0" = _foreach_mul_2[81] + getitem_822: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[82] + getitem_823: "f32[3072][1]cuda:0" = _foreach_mul_2[83] + getitem_824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[84] + getitem_825: "f32[768][1]cuda:0" = _foreach_mul_2[85] + getitem_826: "f32[768][1]cuda:0" = _foreach_mul_2[86] + getitem_827: "f32[768][1]cuda:0" = _foreach_mul_2[87] + getitem_828: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[88] + getitem_829: "f32[2304][1]cuda:0" = _foreach_mul_2[89] + getitem_830: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[90] + getitem_831: "f32[768][1]cuda:0" = _foreach_mul_2[91] + getitem_832: "f32[768][1]cuda:0" = _foreach_mul_2[92] + getitem_833: "f32[768][1]cuda:0" = _foreach_mul_2[93] + getitem_834: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[94] + getitem_835: "f32[3072][1]cuda:0" = _foreach_mul_2[95] + getitem_836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[96] + getitem_837: "f32[768][1]cuda:0" = _foreach_mul_2[97] + getitem_838: "f32[768][1]cuda:0" = _foreach_mul_2[98] + getitem_839: "f32[768][1]cuda:0" = _foreach_mul_2[99] + getitem_840: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[100] + getitem_841: "f32[2304][1]cuda:0" = _foreach_mul_2[101] + getitem_842: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[102] + getitem_843: "f32[768][1]cuda:0" = _foreach_mul_2[103] + getitem_844: "f32[768][1]cuda:0" = _foreach_mul_2[104] + getitem_845: "f32[768][1]cuda:0" = _foreach_mul_2[105] + getitem_846: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[106] + getitem_847: "f32[3072][1]cuda:0" = _foreach_mul_2[107] + getitem_848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[108] + getitem_849: "f32[768][1]cuda:0" = _foreach_mul_2[109] + getitem_850: "f32[768][1]cuda:0" = _foreach_mul_2[110] + getitem_851: "f32[768][1]cuda:0" = _foreach_mul_2[111] + getitem_852: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[112] + getitem_853: "f32[2304][1]cuda:0" = _foreach_mul_2[113] + getitem_854: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[114] + getitem_855: "f32[768][1]cuda:0" = _foreach_mul_2[115] + getitem_856: "f32[768][1]cuda:0" = _foreach_mul_2[116] + getitem_857: "f32[768][1]cuda:0" = _foreach_mul_2[117] + getitem_858: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[118] + getitem_859: "f32[3072][1]cuda:0" = _foreach_mul_2[119] + getitem_860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[120] + getitem_861: "f32[768][1]cuda:0" = _foreach_mul_2[121] + getitem_862: "f32[768][1]cuda:0" = _foreach_mul_2[122] + getitem_863: "f32[768][1]cuda:0" = _foreach_mul_2[123] + getitem_864: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[124] + getitem_865: "f32[2304][1]cuda:0" = _foreach_mul_2[125] + getitem_866: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[126] + getitem_867: "f32[768][1]cuda:0" = _foreach_mul_2[127] + getitem_868: "f32[768][1]cuda:0" = _foreach_mul_2[128] + getitem_869: "f32[768][1]cuda:0" = _foreach_mul_2[129] + getitem_870: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[130] + getitem_871: "f32[3072][1]cuda:0" = _foreach_mul_2[131] + getitem_872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[132] + getitem_873: "f32[768][1]cuda:0" = _foreach_mul_2[133] + getitem_874: "f32[768][1]cuda:0" = _foreach_mul_2[134] + getitem_875: "f32[768][1]cuda:0" = _foreach_mul_2[135] + getitem_876: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[136] + getitem_877: "f32[2304][1]cuda:0" = _foreach_mul_2[137] + getitem_878: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[138] + getitem_879: "f32[768][1]cuda:0" = _foreach_mul_2[139] + getitem_880: "f32[768][1]cuda:0" = _foreach_mul_2[140] + getitem_881: "f32[768][1]cuda:0" = _foreach_mul_2[141] + getitem_882: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[142] + getitem_883: "f32[3072][1]cuda:0" = _foreach_mul_2[143] + getitem_884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[144] + getitem_885: "f32[768][1]cuda:0" = _foreach_mul_2[145] + getitem_886: "f32[768][1]cuda:0" = _foreach_mul_2[146] + getitem_887: "f32[768][1]cuda:0" = _foreach_mul_2[147]; _foreach_mul_2 = None + _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None + getitem_888: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_2[0] + getitem_889: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_2[1] + getitem_890: "f32[768][1]cuda:0" = _foreach_add_2[2] + getitem_891: "f32[768][1]cuda:0" = _foreach_add_2[3] + getitem_892: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[4] + getitem_893: "f32[2304][1]cuda:0" = _foreach_add_2[5] + getitem_894: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[6] + getitem_895: "f32[768][1]cuda:0" = _foreach_add_2[7] + getitem_896: "f32[768][1]cuda:0" = _foreach_add_2[8] + getitem_897: "f32[768][1]cuda:0" = _foreach_add_2[9] + getitem_898: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[10] + getitem_899: "f32[3072][1]cuda:0" = _foreach_add_2[11] + getitem_900: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[12] + getitem_901: "f32[768][1]cuda:0" = _foreach_add_2[13] + getitem_902: "f32[768][1]cuda:0" = _foreach_add_2[14] + getitem_903: "f32[768][1]cuda:0" = _foreach_add_2[15] + getitem_904: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[16] + getitem_905: "f32[2304][1]cuda:0" = _foreach_add_2[17] + getitem_906: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[18] + getitem_907: "f32[768][1]cuda:0" = _foreach_add_2[19] + getitem_908: "f32[768][1]cuda:0" = _foreach_add_2[20] + getitem_909: "f32[768][1]cuda:0" = _foreach_add_2[21] + getitem_910: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[22] + getitem_911: "f32[3072][1]cuda:0" = _foreach_add_2[23] + getitem_912: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[24] + getitem_913: "f32[768][1]cuda:0" = _foreach_add_2[25] + getitem_914: "f32[768][1]cuda:0" = _foreach_add_2[26] + getitem_915: "f32[768][1]cuda:0" = _foreach_add_2[27] + getitem_916: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[28] + getitem_917: "f32[2304][1]cuda:0" = _foreach_add_2[29] + getitem_918: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[30] + getitem_919: "f32[768][1]cuda:0" = _foreach_add_2[31] + getitem_920: "f32[768][1]cuda:0" = _foreach_add_2[32] + getitem_921: "f32[768][1]cuda:0" = _foreach_add_2[33] + getitem_922: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[34] + getitem_923: "f32[3072][1]cuda:0" = _foreach_add_2[35] + getitem_924: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[36] + getitem_925: "f32[768][1]cuda:0" = _foreach_add_2[37] + getitem_926: "f32[768][1]cuda:0" = _foreach_add_2[38] + getitem_927: "f32[768][1]cuda:0" = _foreach_add_2[39] + getitem_928: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[40] + getitem_929: "f32[2304][1]cuda:0" = _foreach_add_2[41] + getitem_930: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[42] + getitem_931: "f32[768][1]cuda:0" = _foreach_add_2[43] + getitem_932: "f32[768][1]cuda:0" = _foreach_add_2[44] + getitem_933: "f32[768][1]cuda:0" = _foreach_add_2[45] + getitem_934: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[46] + getitem_935: "f32[3072][1]cuda:0" = _foreach_add_2[47] + getitem_936: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[48] + getitem_937: "f32[768][1]cuda:0" = _foreach_add_2[49] + getitem_938: "f32[768][1]cuda:0" = _foreach_add_2[50] + getitem_939: "f32[768][1]cuda:0" = _foreach_add_2[51] + getitem_940: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[52] + getitem_941: "f32[2304][1]cuda:0" = _foreach_add_2[53] + getitem_942: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[54] + getitem_943: "f32[768][1]cuda:0" = _foreach_add_2[55] + getitem_944: "f32[768][1]cuda:0" = _foreach_add_2[56] + getitem_945: "f32[768][1]cuda:0" = _foreach_add_2[57] + getitem_946: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[58] + getitem_947: "f32[3072][1]cuda:0" = _foreach_add_2[59] + getitem_948: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[60] + getitem_949: "f32[768][1]cuda:0" = _foreach_add_2[61] + getitem_950: "f32[768][1]cuda:0" = _foreach_add_2[62] + getitem_951: "f32[768][1]cuda:0" = _foreach_add_2[63] + getitem_952: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[64] + getitem_953: "f32[2304][1]cuda:0" = _foreach_add_2[65] + getitem_954: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[66] + getitem_955: "f32[768][1]cuda:0" = _foreach_add_2[67] + getitem_956: "f32[768][1]cuda:0" = _foreach_add_2[68] + getitem_957: "f32[768][1]cuda:0" = _foreach_add_2[69] + getitem_958: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[70] + getitem_959: "f32[3072][1]cuda:0" = _foreach_add_2[71] + getitem_960: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[72] + getitem_961: "f32[768][1]cuda:0" = _foreach_add_2[73] + getitem_962: "f32[768][1]cuda:0" = _foreach_add_2[74] + getitem_963: "f32[768][1]cuda:0" = _foreach_add_2[75] + getitem_964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[76] + getitem_965: "f32[2304][1]cuda:0" = _foreach_add_2[77] + getitem_966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[78] + getitem_967: "f32[768][1]cuda:0" = _foreach_add_2[79] + getitem_968: "f32[768][1]cuda:0" = _foreach_add_2[80] + getitem_969: "f32[768][1]cuda:0" = _foreach_add_2[81] + getitem_970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[82] + getitem_971: "f32[3072][1]cuda:0" = _foreach_add_2[83] + getitem_972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[84] + getitem_973: "f32[768][1]cuda:0" = _foreach_add_2[85] + getitem_974: "f32[768][1]cuda:0" = _foreach_add_2[86] + getitem_975: "f32[768][1]cuda:0" = _foreach_add_2[87] + getitem_976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[88] + getitem_977: "f32[2304][1]cuda:0" = _foreach_add_2[89] + getitem_978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[90] + getitem_979: "f32[768][1]cuda:0" = _foreach_add_2[91] + getitem_980: "f32[768][1]cuda:0" = _foreach_add_2[92] + getitem_981: "f32[768][1]cuda:0" = _foreach_add_2[93] + getitem_982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[94] + getitem_983: "f32[3072][1]cuda:0" = _foreach_add_2[95] + getitem_984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[96] + getitem_985: "f32[768][1]cuda:0" = _foreach_add_2[97] + getitem_986: "f32[768][1]cuda:0" = _foreach_add_2[98] + getitem_987: "f32[768][1]cuda:0" = _foreach_add_2[99] + getitem_988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[100] + getitem_989: "f32[2304][1]cuda:0" = _foreach_add_2[101] + getitem_990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[102] + getitem_991: "f32[768][1]cuda:0" = _foreach_add_2[103] + getitem_992: "f32[768][1]cuda:0" = _foreach_add_2[104] + getitem_993: "f32[768][1]cuda:0" = _foreach_add_2[105] + getitem_994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[106] + getitem_995: "f32[3072][1]cuda:0" = _foreach_add_2[107] + getitem_996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[108] + getitem_997: "f32[768][1]cuda:0" = _foreach_add_2[109] + getitem_998: "f32[768][1]cuda:0" = _foreach_add_2[110] + getitem_999: "f32[768][1]cuda:0" = _foreach_add_2[111] + getitem_1000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[112] + getitem_1001: "f32[2304][1]cuda:0" = _foreach_add_2[113] + getitem_1002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[114] + getitem_1003: "f32[768][1]cuda:0" = _foreach_add_2[115] + getitem_1004: "f32[768][1]cuda:0" = _foreach_add_2[116] + getitem_1005: "f32[768][1]cuda:0" = _foreach_add_2[117] + getitem_1006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[118] + getitem_1007: "f32[3072][1]cuda:0" = _foreach_add_2[119] + getitem_1008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[120] + getitem_1009: "f32[768][1]cuda:0" = _foreach_add_2[121] + getitem_1010: "f32[768][1]cuda:0" = _foreach_add_2[122] + getitem_1011: "f32[768][1]cuda:0" = _foreach_add_2[123] + getitem_1012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[124] + getitem_1013: "f32[2304][1]cuda:0" = _foreach_add_2[125] + getitem_1014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[126] + getitem_1015: "f32[768][1]cuda:0" = _foreach_add_2[127] + getitem_1016: "f32[768][1]cuda:0" = _foreach_add_2[128] + getitem_1017: "f32[768][1]cuda:0" = _foreach_add_2[129] + getitem_1018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[130] + getitem_1019: "f32[3072][1]cuda:0" = _foreach_add_2[131] + getitem_1020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[132] + getitem_1021: "f32[768][1]cuda:0" = _foreach_add_2[133] + getitem_1022: "f32[768][1]cuda:0" = _foreach_add_2[134] + getitem_1023: "f32[768][1]cuda:0" = _foreach_add_2[135] + getitem_1024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[136] + getitem_1025: "f32[2304][1]cuda:0" = _foreach_add_2[137] + getitem_1026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[138] + getitem_1027: "f32[768][1]cuda:0" = _foreach_add_2[139] + getitem_1028: "f32[768][1]cuda:0" = _foreach_add_2[140] + getitem_1029: "f32[768][1]cuda:0" = _foreach_add_2[141] + getitem_1030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[142] + getitem_1031: "f32[3072][1]cuda:0" = _foreach_add_2[143] + getitem_1032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[144] + getitem_1033: "f32[768][1]cuda:0" = _foreach_add_2[145] + getitem_1034: "f32[768][1]cuda:0" = _foreach_add_2[146] + getitem_1035: "f32[768][1]cuda:0" = _foreach_add_2[147]; _foreach_add_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps) + _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]) + getitem_1036: "f32[][]cuda:0" = _foreach_pow[0] + getitem_1037: "f32[][]cuda:0" = _foreach_pow[1] + getitem_1038: "f32[][]cuda:0" = _foreach_pow[2] + getitem_1039: "f32[][]cuda:0" = _foreach_pow[3] + getitem_1040: "f32[][]cuda:0" = _foreach_pow[4] + getitem_1041: "f32[][]cuda:0" = _foreach_pow[5] + getitem_1042: "f32[][]cuda:0" = _foreach_pow[6] + getitem_1043: "f32[][]cuda:0" = _foreach_pow[7] + getitem_1044: "f32[][]cuda:0" = _foreach_pow[8] + getitem_1045: "f32[][]cuda:0" = _foreach_pow[9] + getitem_1046: "f32[][]cuda:0" = _foreach_pow[10] + getitem_1047: "f32[][]cuda:0" = _foreach_pow[11] + getitem_1048: "f32[][]cuda:0" = _foreach_pow[12] + getitem_1049: "f32[][]cuda:0" = _foreach_pow[13] + getitem_1050: "f32[][]cuda:0" = _foreach_pow[14] + getitem_1051: "f32[][]cuda:0" = _foreach_pow[15] + getitem_1052: "f32[][]cuda:0" = _foreach_pow[16] + getitem_1053: "f32[][]cuda:0" = _foreach_pow[17] + getitem_1054: "f32[][]cuda:0" = _foreach_pow[18] + getitem_1055: "f32[][]cuda:0" = _foreach_pow[19] + getitem_1056: "f32[][]cuda:0" = _foreach_pow[20] + getitem_1057: "f32[][]cuda:0" = _foreach_pow[21] + getitem_1058: "f32[][]cuda:0" = _foreach_pow[22] + getitem_1059: "f32[][]cuda:0" = _foreach_pow[23] + getitem_1060: "f32[][]cuda:0" = _foreach_pow[24] + getitem_1061: "f32[][]cuda:0" = _foreach_pow[25] + getitem_1062: "f32[][]cuda:0" = _foreach_pow[26] + getitem_1063: "f32[][]cuda:0" = _foreach_pow[27] + getitem_1064: "f32[][]cuda:0" = _foreach_pow[28] + getitem_1065: "f32[][]cuda:0" = _foreach_pow[29] + getitem_1066: "f32[][]cuda:0" = _foreach_pow[30] + getitem_1067: "f32[][]cuda:0" = _foreach_pow[31] + getitem_1068: "f32[][]cuda:0" = _foreach_pow[32] + getitem_1069: "f32[][]cuda:0" = _foreach_pow[33] + getitem_1070: "f32[][]cuda:0" = _foreach_pow[34] + getitem_1071: "f32[][]cuda:0" = _foreach_pow[35] + getitem_1072: "f32[][]cuda:0" = _foreach_pow[36] + getitem_1073: "f32[][]cuda:0" = _foreach_pow[37] + getitem_1074: "f32[][]cuda:0" = _foreach_pow[38] + getitem_1075: "f32[][]cuda:0" = _foreach_pow[39] + getitem_1076: "f32[][]cuda:0" = _foreach_pow[40] + getitem_1077: "f32[][]cuda:0" = _foreach_pow[41] + getitem_1078: "f32[][]cuda:0" = _foreach_pow[42] + getitem_1079: "f32[][]cuda:0" = _foreach_pow[43] + getitem_1080: "f32[][]cuda:0" = _foreach_pow[44] + getitem_1081: "f32[][]cuda:0" = _foreach_pow[45] + getitem_1082: "f32[][]cuda:0" = _foreach_pow[46] + getitem_1083: "f32[][]cuda:0" = _foreach_pow[47] + getitem_1084: "f32[][]cuda:0" = _foreach_pow[48] + getitem_1085: "f32[][]cuda:0" = _foreach_pow[49] + getitem_1086: "f32[][]cuda:0" = _foreach_pow[50] + getitem_1087: "f32[][]cuda:0" = _foreach_pow[51] + getitem_1088: "f32[][]cuda:0" = _foreach_pow[52] + getitem_1089: "f32[][]cuda:0" = _foreach_pow[53] + getitem_1090: "f32[][]cuda:0" = _foreach_pow[54] + getitem_1091: "f32[][]cuda:0" = _foreach_pow[55] + getitem_1092: "f32[][]cuda:0" = _foreach_pow[56] + getitem_1093: "f32[][]cuda:0" = _foreach_pow[57] + getitem_1094: "f32[][]cuda:0" = _foreach_pow[58] + getitem_1095: "f32[][]cuda:0" = _foreach_pow[59] + getitem_1096: "f32[][]cuda:0" = _foreach_pow[60] + getitem_1097: "f32[][]cuda:0" = _foreach_pow[61] + getitem_1098: "f32[][]cuda:0" = _foreach_pow[62] + getitem_1099: "f32[][]cuda:0" = _foreach_pow[63] + getitem_1100: "f32[][]cuda:0" = _foreach_pow[64] + getitem_1101: "f32[][]cuda:0" = _foreach_pow[65] + getitem_1102: "f32[][]cuda:0" = _foreach_pow[66] + getitem_1103: "f32[][]cuda:0" = _foreach_pow[67] + getitem_1104: "f32[][]cuda:0" = _foreach_pow[68] + getitem_1105: "f32[][]cuda:0" = _foreach_pow[69] + getitem_1106: "f32[][]cuda:0" = _foreach_pow[70] + getitem_1107: "f32[][]cuda:0" = _foreach_pow[71] + getitem_1108: "f32[][]cuda:0" = _foreach_pow[72] + getitem_1109: "f32[][]cuda:0" = _foreach_pow[73] + getitem_1110: "f32[][]cuda:0" = _foreach_pow[74] + getitem_1111: "f32[][]cuda:0" = _foreach_pow[75] + getitem_1112: "f32[][]cuda:0" = _foreach_pow[76] + getitem_1113: "f32[][]cuda:0" = _foreach_pow[77] + getitem_1114: "f32[][]cuda:0" = _foreach_pow[78] + getitem_1115: "f32[][]cuda:0" = _foreach_pow[79] + getitem_1116: "f32[][]cuda:0" = _foreach_pow[80] + getitem_1117: "f32[][]cuda:0" = _foreach_pow[81] + getitem_1118: "f32[][]cuda:0" = _foreach_pow[82] + getitem_1119: "f32[][]cuda:0" = _foreach_pow[83] + getitem_1120: "f32[][]cuda:0" = _foreach_pow[84] + getitem_1121: "f32[][]cuda:0" = _foreach_pow[85] + getitem_1122: "f32[][]cuda:0" = _foreach_pow[86] + getitem_1123: "f32[][]cuda:0" = _foreach_pow[87] + getitem_1124: "f32[][]cuda:0" = _foreach_pow[88] + getitem_1125: "f32[][]cuda:0" = _foreach_pow[89] + getitem_1126: "f32[][]cuda:0" = _foreach_pow[90] + getitem_1127: "f32[][]cuda:0" = _foreach_pow[91] + getitem_1128: "f32[][]cuda:0" = _foreach_pow[92] + getitem_1129: "f32[][]cuda:0" = _foreach_pow[93] + getitem_1130: "f32[][]cuda:0" = _foreach_pow[94] + getitem_1131: "f32[][]cuda:0" = _foreach_pow[95] + getitem_1132: "f32[][]cuda:0" = _foreach_pow[96] + getitem_1133: "f32[][]cuda:0" = _foreach_pow[97] + getitem_1134: "f32[][]cuda:0" = _foreach_pow[98] + getitem_1135: "f32[][]cuda:0" = _foreach_pow[99] + getitem_1136: "f32[][]cuda:0" = _foreach_pow[100] + getitem_1137: "f32[][]cuda:0" = _foreach_pow[101] + getitem_1138: "f32[][]cuda:0" = _foreach_pow[102] + getitem_1139: "f32[][]cuda:0" = _foreach_pow[103] + getitem_1140: "f32[][]cuda:0" = _foreach_pow[104] + getitem_1141: "f32[][]cuda:0" = _foreach_pow[105] + getitem_1142: "f32[][]cuda:0" = _foreach_pow[106] + getitem_1143: "f32[][]cuda:0" = _foreach_pow[107] + getitem_1144: "f32[][]cuda:0" = _foreach_pow[108] + getitem_1145: "f32[][]cuda:0" = _foreach_pow[109] + getitem_1146: "f32[][]cuda:0" = _foreach_pow[110] + getitem_1147: "f32[][]cuda:0" = _foreach_pow[111] + getitem_1148: "f32[][]cuda:0" = _foreach_pow[112] + getitem_1149: "f32[][]cuda:0" = _foreach_pow[113] + getitem_1150: "f32[][]cuda:0" = _foreach_pow[114] + getitem_1151: "f32[][]cuda:0" = _foreach_pow[115] + getitem_1152: "f32[][]cuda:0" = _foreach_pow[116] + getitem_1153: "f32[][]cuda:0" = _foreach_pow[117] + getitem_1154: "f32[][]cuda:0" = _foreach_pow[118] + getitem_1155: "f32[][]cuda:0" = _foreach_pow[119] + getitem_1156: "f32[][]cuda:0" = _foreach_pow[120] + getitem_1157: "f32[][]cuda:0" = _foreach_pow[121] + getitem_1158: "f32[][]cuda:0" = _foreach_pow[122] + getitem_1159: "f32[][]cuda:0" = _foreach_pow[123] + getitem_1160: "f32[][]cuda:0" = _foreach_pow[124] + getitem_1161: "f32[][]cuda:0" = _foreach_pow[125] + getitem_1162: "f32[][]cuda:0" = _foreach_pow[126] + getitem_1163: "f32[][]cuda:0" = _foreach_pow[127] + getitem_1164: "f32[][]cuda:0" = _foreach_pow[128] + getitem_1165: "f32[][]cuda:0" = _foreach_pow[129] + getitem_1166: "f32[][]cuda:0" = _foreach_pow[130] + getitem_1167: "f32[][]cuda:0" = _foreach_pow[131] + getitem_1168: "f32[][]cuda:0" = _foreach_pow[132] + getitem_1169: "f32[][]cuda:0" = _foreach_pow[133] + getitem_1170: "f32[][]cuda:0" = _foreach_pow[134] + getitem_1171: "f32[][]cuda:0" = _foreach_pow[135] + getitem_1172: "f32[][]cuda:0" = _foreach_pow[136] + getitem_1173: "f32[][]cuda:0" = _foreach_pow[137] + getitem_1174: "f32[][]cuda:0" = _foreach_pow[138] + getitem_1175: "f32[][]cuda:0" = _foreach_pow[139] + getitem_1176: "f32[][]cuda:0" = _foreach_pow[140] + getitem_1177: "f32[][]cuda:0" = _foreach_pow[141] + getitem_1178: "f32[][]cuda:0" = _foreach_pow[142] + getitem_1179: "f32[][]cuda:0" = _foreach_pow[143] + getitem_1180: "f32[][]cuda:0" = _foreach_pow[144] + getitem_1181: "f32[][]cuda:0" = _foreach_pow[145] + getitem_1182: "f32[][]cuda:0" = _foreach_pow[146] + getitem_1183: "f32[][]cuda:0" = _foreach_pow[147]; _foreach_pow = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps) + _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]) + getitem_1184: "f32[][]cuda:0" = _foreach_pow_1[0] + getitem_1185: "f32[][]cuda:0" = _foreach_pow_1[1] + getitem_1186: "f32[][]cuda:0" = _foreach_pow_1[2] + getitem_1187: "f32[][]cuda:0" = _foreach_pow_1[3] + getitem_1188: "f32[][]cuda:0" = _foreach_pow_1[4] + getitem_1189: "f32[][]cuda:0" = _foreach_pow_1[5] + getitem_1190: "f32[][]cuda:0" = _foreach_pow_1[6] + getitem_1191: "f32[][]cuda:0" = _foreach_pow_1[7] + getitem_1192: "f32[][]cuda:0" = _foreach_pow_1[8] + getitem_1193: "f32[][]cuda:0" = _foreach_pow_1[9] + getitem_1194: "f32[][]cuda:0" = _foreach_pow_1[10] + getitem_1195: "f32[][]cuda:0" = _foreach_pow_1[11] + getitem_1196: "f32[][]cuda:0" = _foreach_pow_1[12] + getitem_1197: "f32[][]cuda:0" = _foreach_pow_1[13] + getitem_1198: "f32[][]cuda:0" = _foreach_pow_1[14] + getitem_1199: "f32[][]cuda:0" = _foreach_pow_1[15] + getitem_1200: "f32[][]cuda:0" = _foreach_pow_1[16] + getitem_1201: "f32[][]cuda:0" = _foreach_pow_1[17] + getitem_1202: "f32[][]cuda:0" = _foreach_pow_1[18] + getitem_1203: "f32[][]cuda:0" = _foreach_pow_1[19] + getitem_1204: "f32[][]cuda:0" = _foreach_pow_1[20] + getitem_1205: "f32[][]cuda:0" = _foreach_pow_1[21] + getitem_1206: "f32[][]cuda:0" = _foreach_pow_1[22] + getitem_1207: "f32[][]cuda:0" = _foreach_pow_1[23] + getitem_1208: "f32[][]cuda:0" = _foreach_pow_1[24] + getitem_1209: "f32[][]cuda:0" = _foreach_pow_1[25] + getitem_1210: "f32[][]cuda:0" = _foreach_pow_1[26] + getitem_1211: "f32[][]cuda:0" = _foreach_pow_1[27] + getitem_1212: "f32[][]cuda:0" = _foreach_pow_1[28] + getitem_1213: "f32[][]cuda:0" = _foreach_pow_1[29] + getitem_1214: "f32[][]cuda:0" = _foreach_pow_1[30] + getitem_1215: "f32[][]cuda:0" = _foreach_pow_1[31] + getitem_1216: "f32[][]cuda:0" = _foreach_pow_1[32] + getitem_1217: "f32[][]cuda:0" = _foreach_pow_1[33] + getitem_1218: "f32[][]cuda:0" = _foreach_pow_1[34] + getitem_1219: "f32[][]cuda:0" = _foreach_pow_1[35] + getitem_1220: "f32[][]cuda:0" = _foreach_pow_1[36] + getitem_1221: "f32[][]cuda:0" = _foreach_pow_1[37] + getitem_1222: "f32[][]cuda:0" = _foreach_pow_1[38] + getitem_1223: "f32[][]cuda:0" = _foreach_pow_1[39] + getitem_1224: "f32[][]cuda:0" = _foreach_pow_1[40] + getitem_1225: "f32[][]cuda:0" = _foreach_pow_1[41] + getitem_1226: "f32[][]cuda:0" = _foreach_pow_1[42] + getitem_1227: "f32[][]cuda:0" = _foreach_pow_1[43] + getitem_1228: "f32[][]cuda:0" = _foreach_pow_1[44] + getitem_1229: "f32[][]cuda:0" = _foreach_pow_1[45] + getitem_1230: "f32[][]cuda:0" = _foreach_pow_1[46] + getitem_1231: "f32[][]cuda:0" = _foreach_pow_1[47] + getitem_1232: "f32[][]cuda:0" = _foreach_pow_1[48] + getitem_1233: "f32[][]cuda:0" = _foreach_pow_1[49] + getitem_1234: "f32[][]cuda:0" = _foreach_pow_1[50] + getitem_1235: "f32[][]cuda:0" = _foreach_pow_1[51] + getitem_1236: "f32[][]cuda:0" = _foreach_pow_1[52] + getitem_1237: "f32[][]cuda:0" = _foreach_pow_1[53] + getitem_1238: "f32[][]cuda:0" = _foreach_pow_1[54] + getitem_1239: "f32[][]cuda:0" = _foreach_pow_1[55] + getitem_1240: "f32[][]cuda:0" = _foreach_pow_1[56] + getitem_1241: "f32[][]cuda:0" = _foreach_pow_1[57] + getitem_1242: "f32[][]cuda:0" = _foreach_pow_1[58] + getitem_1243: "f32[][]cuda:0" = _foreach_pow_1[59] + getitem_1244: "f32[][]cuda:0" = _foreach_pow_1[60] + getitem_1245: "f32[][]cuda:0" = _foreach_pow_1[61] + getitem_1246: "f32[][]cuda:0" = _foreach_pow_1[62] + getitem_1247: "f32[][]cuda:0" = _foreach_pow_1[63] + getitem_1248: "f32[][]cuda:0" = _foreach_pow_1[64] + getitem_1249: "f32[][]cuda:0" = _foreach_pow_1[65] + getitem_1250: "f32[][]cuda:0" = _foreach_pow_1[66] + getitem_1251: "f32[][]cuda:0" = _foreach_pow_1[67] + getitem_1252: "f32[][]cuda:0" = _foreach_pow_1[68] + getitem_1253: "f32[][]cuda:0" = _foreach_pow_1[69] + getitem_1254: "f32[][]cuda:0" = _foreach_pow_1[70] + getitem_1255: "f32[][]cuda:0" = _foreach_pow_1[71] + getitem_1256: "f32[][]cuda:0" = _foreach_pow_1[72] + getitem_1257: "f32[][]cuda:0" = _foreach_pow_1[73] + getitem_1258: "f32[][]cuda:0" = _foreach_pow_1[74] + getitem_1259: "f32[][]cuda:0" = _foreach_pow_1[75] + getitem_1260: "f32[][]cuda:0" = _foreach_pow_1[76] + getitem_1261: "f32[][]cuda:0" = _foreach_pow_1[77] + getitem_1262: "f32[][]cuda:0" = _foreach_pow_1[78] + getitem_1263: "f32[][]cuda:0" = _foreach_pow_1[79] + getitem_1264: "f32[][]cuda:0" = _foreach_pow_1[80] + getitem_1265: "f32[][]cuda:0" = _foreach_pow_1[81] + getitem_1266: "f32[][]cuda:0" = _foreach_pow_1[82] + getitem_1267: "f32[][]cuda:0" = _foreach_pow_1[83] + getitem_1268: "f32[][]cuda:0" = _foreach_pow_1[84] + getitem_1269: "f32[][]cuda:0" = _foreach_pow_1[85] + getitem_1270: "f32[][]cuda:0" = _foreach_pow_1[86] + getitem_1271: "f32[][]cuda:0" = _foreach_pow_1[87] + getitem_1272: "f32[][]cuda:0" = _foreach_pow_1[88] + getitem_1273: "f32[][]cuda:0" = _foreach_pow_1[89] + getitem_1274: "f32[][]cuda:0" = _foreach_pow_1[90] + getitem_1275: "f32[][]cuda:0" = _foreach_pow_1[91] + getitem_1276: "f32[][]cuda:0" = _foreach_pow_1[92] + getitem_1277: "f32[][]cuda:0" = _foreach_pow_1[93] + getitem_1278: "f32[][]cuda:0" = _foreach_pow_1[94] + getitem_1279: "f32[][]cuda:0" = _foreach_pow_1[95] + getitem_1280: "f32[][]cuda:0" = _foreach_pow_1[96] + getitem_1281: "f32[][]cuda:0" = _foreach_pow_1[97] + getitem_1282: "f32[][]cuda:0" = _foreach_pow_1[98] + getitem_1283: "f32[][]cuda:0" = _foreach_pow_1[99] + getitem_1284: "f32[][]cuda:0" = _foreach_pow_1[100] + getitem_1285: "f32[][]cuda:0" = _foreach_pow_1[101] + getitem_1286: "f32[][]cuda:0" = _foreach_pow_1[102] + getitem_1287: "f32[][]cuda:0" = _foreach_pow_1[103] + getitem_1288: "f32[][]cuda:0" = _foreach_pow_1[104] + getitem_1289: "f32[][]cuda:0" = _foreach_pow_1[105] + getitem_1290: "f32[][]cuda:0" = _foreach_pow_1[106] + getitem_1291: "f32[][]cuda:0" = _foreach_pow_1[107] + getitem_1292: "f32[][]cuda:0" = _foreach_pow_1[108] + getitem_1293: "f32[][]cuda:0" = _foreach_pow_1[109] + getitem_1294: "f32[][]cuda:0" = _foreach_pow_1[110] + getitem_1295: "f32[][]cuda:0" = _foreach_pow_1[111] + getitem_1296: "f32[][]cuda:0" = _foreach_pow_1[112] + getitem_1297: "f32[][]cuda:0" = _foreach_pow_1[113] + getitem_1298: "f32[][]cuda:0" = _foreach_pow_1[114] + getitem_1299: "f32[][]cuda:0" = _foreach_pow_1[115] + getitem_1300: "f32[][]cuda:0" = _foreach_pow_1[116] + getitem_1301: "f32[][]cuda:0" = _foreach_pow_1[117] + getitem_1302: "f32[][]cuda:0" = _foreach_pow_1[118] + getitem_1303: "f32[][]cuda:0" = _foreach_pow_1[119] + getitem_1304: "f32[][]cuda:0" = _foreach_pow_1[120] + getitem_1305: "f32[][]cuda:0" = _foreach_pow_1[121] + getitem_1306: "f32[][]cuda:0" = _foreach_pow_1[122] + getitem_1307: "f32[][]cuda:0" = _foreach_pow_1[123] + getitem_1308: "f32[][]cuda:0" = _foreach_pow_1[124] + getitem_1309: "f32[][]cuda:0" = _foreach_pow_1[125] + getitem_1310: "f32[][]cuda:0" = _foreach_pow_1[126] + getitem_1311: "f32[][]cuda:0" = _foreach_pow_1[127] + getitem_1312: "f32[][]cuda:0" = _foreach_pow_1[128] + getitem_1313: "f32[][]cuda:0" = _foreach_pow_1[129] + getitem_1314: "f32[][]cuda:0" = _foreach_pow_1[130] + getitem_1315: "f32[][]cuda:0" = _foreach_pow_1[131] + getitem_1316: "f32[][]cuda:0" = _foreach_pow_1[132] + getitem_1317: "f32[][]cuda:0" = _foreach_pow_1[133] + getitem_1318: "f32[][]cuda:0" = _foreach_pow_1[134] + getitem_1319: "f32[][]cuda:0" = _foreach_pow_1[135] + getitem_1320: "f32[][]cuda:0" = _foreach_pow_1[136] + getitem_1321: "f32[][]cuda:0" = _foreach_pow_1[137] + getitem_1322: "f32[][]cuda:0" = _foreach_pow_1[138] + getitem_1323: "f32[][]cuda:0" = _foreach_pow_1[139] + getitem_1324: "f32[][]cuda:0" = _foreach_pow_1[140] + getitem_1325: "f32[][]cuda:0" = _foreach_pow_1[141] + getitem_1326: "f32[][]cuda:0" = _foreach_pow_1[142] + getitem_1327: "f32[][]cuda:0" = _foreach_pow_1[143] + getitem_1328: "f32[][]cuda:0" = _foreach_pow_1[144] + getitem_1329: "f32[][]cuda:0" = _foreach_pow_1[145] + getitem_1330: "f32[][]cuda:0" = _foreach_pow_1[146] + getitem_1331: "f32[][]cuda:0" = _foreach_pow_1[147]; _foreach_pow_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1) + _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1); getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None + getitem_1332: "f32[][]cuda:0" = _foreach_sub_1[0] + getitem_1333: "f32[][]cuda:0" = _foreach_sub_1[1] + getitem_1334: "f32[][]cuda:0" = _foreach_sub_1[2] + getitem_1335: "f32[][]cuda:0" = _foreach_sub_1[3] + getitem_1336: "f32[][]cuda:0" = _foreach_sub_1[4] + getitem_1337: "f32[][]cuda:0" = _foreach_sub_1[5] + getitem_1338: "f32[][]cuda:0" = _foreach_sub_1[6] + getitem_1339: "f32[][]cuda:0" = _foreach_sub_1[7] + getitem_1340: "f32[][]cuda:0" = _foreach_sub_1[8] + getitem_1341: "f32[][]cuda:0" = _foreach_sub_1[9] + getitem_1342: "f32[][]cuda:0" = _foreach_sub_1[10] + getitem_1343: "f32[][]cuda:0" = _foreach_sub_1[11] + getitem_1344: "f32[][]cuda:0" = _foreach_sub_1[12] + getitem_1345: "f32[][]cuda:0" = _foreach_sub_1[13] + getitem_1346: "f32[][]cuda:0" = _foreach_sub_1[14] + getitem_1347: "f32[][]cuda:0" = _foreach_sub_1[15] + getitem_1348: "f32[][]cuda:0" = _foreach_sub_1[16] + getitem_1349: "f32[][]cuda:0" = _foreach_sub_1[17] + getitem_1350: "f32[][]cuda:0" = _foreach_sub_1[18] + getitem_1351: "f32[][]cuda:0" = _foreach_sub_1[19] + getitem_1352: "f32[][]cuda:0" = _foreach_sub_1[20] + getitem_1353: "f32[][]cuda:0" = _foreach_sub_1[21] + getitem_1354: "f32[][]cuda:0" = _foreach_sub_1[22] + getitem_1355: "f32[][]cuda:0" = _foreach_sub_1[23] + getitem_1356: "f32[][]cuda:0" = _foreach_sub_1[24] + getitem_1357: "f32[][]cuda:0" = _foreach_sub_1[25] + getitem_1358: "f32[][]cuda:0" = _foreach_sub_1[26] + getitem_1359: "f32[][]cuda:0" = _foreach_sub_1[27] + getitem_1360: "f32[][]cuda:0" = _foreach_sub_1[28] + getitem_1361: "f32[][]cuda:0" = _foreach_sub_1[29] + getitem_1362: "f32[][]cuda:0" = _foreach_sub_1[30] + getitem_1363: "f32[][]cuda:0" = _foreach_sub_1[31] + getitem_1364: "f32[][]cuda:0" = _foreach_sub_1[32] + getitem_1365: "f32[][]cuda:0" = _foreach_sub_1[33] + getitem_1366: "f32[][]cuda:0" = _foreach_sub_1[34] + getitem_1367: "f32[][]cuda:0" = _foreach_sub_1[35] + getitem_1368: "f32[][]cuda:0" = _foreach_sub_1[36] + getitem_1369: "f32[][]cuda:0" = _foreach_sub_1[37] + getitem_1370: "f32[][]cuda:0" = _foreach_sub_1[38] + getitem_1371: "f32[][]cuda:0" = _foreach_sub_1[39] + getitem_1372: "f32[][]cuda:0" = _foreach_sub_1[40] + getitem_1373: "f32[][]cuda:0" = _foreach_sub_1[41] + getitem_1374: "f32[][]cuda:0" = _foreach_sub_1[42] + getitem_1375: "f32[][]cuda:0" = _foreach_sub_1[43] + getitem_1376: "f32[][]cuda:0" = _foreach_sub_1[44] + getitem_1377: "f32[][]cuda:0" = _foreach_sub_1[45] + getitem_1378: "f32[][]cuda:0" = _foreach_sub_1[46] + getitem_1379: "f32[][]cuda:0" = _foreach_sub_1[47] + getitem_1380: "f32[][]cuda:0" = _foreach_sub_1[48] + getitem_1381: "f32[][]cuda:0" = _foreach_sub_1[49] + getitem_1382: "f32[][]cuda:0" = _foreach_sub_1[50] + getitem_1383: "f32[][]cuda:0" = _foreach_sub_1[51] + getitem_1384: "f32[][]cuda:0" = _foreach_sub_1[52] + getitem_1385: "f32[][]cuda:0" = _foreach_sub_1[53] + getitem_1386: "f32[][]cuda:0" = _foreach_sub_1[54] + getitem_1387: "f32[][]cuda:0" = _foreach_sub_1[55] + getitem_1388: "f32[][]cuda:0" = _foreach_sub_1[56] + getitem_1389: "f32[][]cuda:0" = _foreach_sub_1[57] + getitem_1390: "f32[][]cuda:0" = _foreach_sub_1[58] + getitem_1391: "f32[][]cuda:0" = _foreach_sub_1[59] + getitem_1392: "f32[][]cuda:0" = _foreach_sub_1[60] + getitem_1393: "f32[][]cuda:0" = _foreach_sub_1[61] + getitem_1394: "f32[][]cuda:0" = _foreach_sub_1[62] + getitem_1395: "f32[][]cuda:0" = _foreach_sub_1[63] + getitem_1396: "f32[][]cuda:0" = _foreach_sub_1[64] + getitem_1397: "f32[][]cuda:0" = _foreach_sub_1[65] + getitem_1398: "f32[][]cuda:0" = _foreach_sub_1[66] + getitem_1399: "f32[][]cuda:0" = _foreach_sub_1[67] + getitem_1400: "f32[][]cuda:0" = _foreach_sub_1[68] + getitem_1401: "f32[][]cuda:0" = _foreach_sub_1[69] + getitem_1402: "f32[][]cuda:0" = _foreach_sub_1[70] + getitem_1403: "f32[][]cuda:0" = _foreach_sub_1[71] + getitem_1404: "f32[][]cuda:0" = _foreach_sub_1[72] + getitem_1405: "f32[][]cuda:0" = _foreach_sub_1[73] + getitem_1406: "f32[][]cuda:0" = _foreach_sub_1[74] + getitem_1407: "f32[][]cuda:0" = _foreach_sub_1[75] + getitem_1408: "f32[][]cuda:0" = _foreach_sub_1[76] + getitem_1409: "f32[][]cuda:0" = _foreach_sub_1[77] + getitem_1410: "f32[][]cuda:0" = _foreach_sub_1[78] + getitem_1411: "f32[][]cuda:0" = _foreach_sub_1[79] + getitem_1412: "f32[][]cuda:0" = _foreach_sub_1[80] + getitem_1413: "f32[][]cuda:0" = _foreach_sub_1[81] + getitem_1414: "f32[][]cuda:0" = _foreach_sub_1[82] + getitem_1415: "f32[][]cuda:0" = _foreach_sub_1[83] + getitem_1416: "f32[][]cuda:0" = _foreach_sub_1[84] + getitem_1417: "f32[][]cuda:0" = _foreach_sub_1[85] + getitem_1418: "f32[][]cuda:0" = _foreach_sub_1[86] + getitem_1419: "f32[][]cuda:0" = _foreach_sub_1[87] + getitem_1420: "f32[][]cuda:0" = _foreach_sub_1[88] + getitem_1421: "f32[][]cuda:0" = _foreach_sub_1[89] + getitem_1422: "f32[][]cuda:0" = _foreach_sub_1[90] + getitem_1423: "f32[][]cuda:0" = _foreach_sub_1[91] + getitem_1424: "f32[][]cuda:0" = _foreach_sub_1[92] + getitem_1425: "f32[][]cuda:0" = _foreach_sub_1[93] + getitem_1426: "f32[][]cuda:0" = _foreach_sub_1[94] + getitem_1427: "f32[][]cuda:0" = _foreach_sub_1[95] + getitem_1428: "f32[][]cuda:0" = _foreach_sub_1[96] + getitem_1429: "f32[][]cuda:0" = _foreach_sub_1[97] + getitem_1430: "f32[][]cuda:0" = _foreach_sub_1[98] + getitem_1431: "f32[][]cuda:0" = _foreach_sub_1[99] + getitem_1432: "f32[][]cuda:0" = _foreach_sub_1[100] + getitem_1433: "f32[][]cuda:0" = _foreach_sub_1[101] + getitem_1434: "f32[][]cuda:0" = _foreach_sub_1[102] + getitem_1435: "f32[][]cuda:0" = _foreach_sub_1[103] + getitem_1436: "f32[][]cuda:0" = _foreach_sub_1[104] + getitem_1437: "f32[][]cuda:0" = _foreach_sub_1[105] + getitem_1438: "f32[][]cuda:0" = _foreach_sub_1[106] + getitem_1439: "f32[][]cuda:0" = _foreach_sub_1[107] + getitem_1440: "f32[][]cuda:0" = _foreach_sub_1[108] + getitem_1441: "f32[][]cuda:0" = _foreach_sub_1[109] + getitem_1442: "f32[][]cuda:0" = _foreach_sub_1[110] + getitem_1443: "f32[][]cuda:0" = _foreach_sub_1[111] + getitem_1444: "f32[][]cuda:0" = _foreach_sub_1[112] + getitem_1445: "f32[][]cuda:0" = _foreach_sub_1[113] + getitem_1446: "f32[][]cuda:0" = _foreach_sub_1[114] + getitem_1447: "f32[][]cuda:0" = _foreach_sub_1[115] + getitem_1448: "f32[][]cuda:0" = _foreach_sub_1[116] + getitem_1449: "f32[][]cuda:0" = _foreach_sub_1[117] + getitem_1450: "f32[][]cuda:0" = _foreach_sub_1[118] + getitem_1451: "f32[][]cuda:0" = _foreach_sub_1[119] + getitem_1452: "f32[][]cuda:0" = _foreach_sub_1[120] + getitem_1453: "f32[][]cuda:0" = _foreach_sub_1[121] + getitem_1454: "f32[][]cuda:0" = _foreach_sub_1[122] + getitem_1455: "f32[][]cuda:0" = _foreach_sub_1[123] + getitem_1456: "f32[][]cuda:0" = _foreach_sub_1[124] + getitem_1457: "f32[][]cuda:0" = _foreach_sub_1[125] + getitem_1458: "f32[][]cuda:0" = _foreach_sub_1[126] + getitem_1459: "f32[][]cuda:0" = _foreach_sub_1[127] + getitem_1460: "f32[][]cuda:0" = _foreach_sub_1[128] + getitem_1461: "f32[][]cuda:0" = _foreach_sub_1[129] + getitem_1462: "f32[][]cuda:0" = _foreach_sub_1[130] + getitem_1463: "f32[][]cuda:0" = _foreach_sub_1[131] + getitem_1464: "f32[][]cuda:0" = _foreach_sub_1[132] + getitem_1465: "f32[][]cuda:0" = _foreach_sub_1[133] + getitem_1466: "f32[][]cuda:0" = _foreach_sub_1[134] + getitem_1467: "f32[][]cuda:0" = _foreach_sub_1[135] + getitem_1468: "f32[][]cuda:0" = _foreach_sub_1[136] + getitem_1469: "f32[][]cuda:0" = _foreach_sub_1[137] + getitem_1470: "f32[][]cuda:0" = _foreach_sub_1[138] + getitem_1471: "f32[][]cuda:0" = _foreach_sub_1[139] + getitem_1472: "f32[][]cuda:0" = _foreach_sub_1[140] + getitem_1473: "f32[][]cuda:0" = _foreach_sub_1[141] + getitem_1474: "f32[][]cuda:0" = _foreach_sub_1[142] + getitem_1475: "f32[][]cuda:0" = _foreach_sub_1[143] + getitem_1476: "f32[][]cuda:0" = _foreach_sub_1[144] + getitem_1477: "f32[][]cuda:0" = _foreach_sub_1[145] + getitem_1478: "f32[][]cuda:0" = _foreach_sub_1[146] + getitem_1479: "f32[][]cuda:0" = _foreach_sub_1[147]; _foreach_sub_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1) + _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1); getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None + getitem_1480: "f32[][]cuda:0" = _foreach_sub_2[0] + getitem_1481: "f32[][]cuda:0" = _foreach_sub_2[1] + getitem_1482: "f32[][]cuda:0" = _foreach_sub_2[2] + getitem_1483: "f32[][]cuda:0" = _foreach_sub_2[3] + getitem_1484: "f32[][]cuda:0" = _foreach_sub_2[4] + getitem_1485: "f32[][]cuda:0" = _foreach_sub_2[5] + getitem_1486: "f32[][]cuda:0" = _foreach_sub_2[6] + getitem_1487: "f32[][]cuda:0" = _foreach_sub_2[7] + getitem_1488: "f32[][]cuda:0" = _foreach_sub_2[8] + getitem_1489: "f32[][]cuda:0" = _foreach_sub_2[9] + getitem_1490: "f32[][]cuda:0" = _foreach_sub_2[10] + getitem_1491: "f32[][]cuda:0" = _foreach_sub_2[11] + getitem_1492: "f32[][]cuda:0" = _foreach_sub_2[12] + getitem_1493: "f32[][]cuda:0" = _foreach_sub_2[13] + getitem_1494: "f32[][]cuda:0" = _foreach_sub_2[14] + getitem_1495: "f32[][]cuda:0" = _foreach_sub_2[15] + getitem_1496: "f32[][]cuda:0" = _foreach_sub_2[16] + getitem_1497: "f32[][]cuda:0" = _foreach_sub_2[17] + getitem_1498: "f32[][]cuda:0" = _foreach_sub_2[18] + getitem_1499: "f32[][]cuda:0" = _foreach_sub_2[19] + getitem_1500: "f32[][]cuda:0" = _foreach_sub_2[20] + getitem_1501: "f32[][]cuda:0" = _foreach_sub_2[21] + getitem_1502: "f32[][]cuda:0" = _foreach_sub_2[22] + getitem_1503: "f32[][]cuda:0" = _foreach_sub_2[23] + getitem_1504: "f32[][]cuda:0" = _foreach_sub_2[24] + getitem_1505: "f32[][]cuda:0" = _foreach_sub_2[25] + getitem_1506: "f32[][]cuda:0" = _foreach_sub_2[26] + getitem_1507: "f32[][]cuda:0" = _foreach_sub_2[27] + getitem_1508: "f32[][]cuda:0" = _foreach_sub_2[28] + getitem_1509: "f32[][]cuda:0" = _foreach_sub_2[29] + getitem_1510: "f32[][]cuda:0" = _foreach_sub_2[30] + getitem_1511: "f32[][]cuda:0" = _foreach_sub_2[31] + getitem_1512: "f32[][]cuda:0" = _foreach_sub_2[32] + getitem_1513: "f32[][]cuda:0" = _foreach_sub_2[33] + getitem_1514: "f32[][]cuda:0" = _foreach_sub_2[34] + getitem_1515: "f32[][]cuda:0" = _foreach_sub_2[35] + getitem_1516: "f32[][]cuda:0" = _foreach_sub_2[36] + getitem_1517: "f32[][]cuda:0" = _foreach_sub_2[37] + getitem_1518: "f32[][]cuda:0" = _foreach_sub_2[38] + getitem_1519: "f32[][]cuda:0" = _foreach_sub_2[39] + getitem_1520: "f32[][]cuda:0" = _foreach_sub_2[40] + getitem_1521: "f32[][]cuda:0" = _foreach_sub_2[41] + getitem_1522: "f32[][]cuda:0" = _foreach_sub_2[42] + getitem_1523: "f32[][]cuda:0" = _foreach_sub_2[43] + getitem_1524: "f32[][]cuda:0" = _foreach_sub_2[44] + getitem_1525: "f32[][]cuda:0" = _foreach_sub_2[45] + getitem_1526: "f32[][]cuda:0" = _foreach_sub_2[46] + getitem_1527: "f32[][]cuda:0" = _foreach_sub_2[47] + getitem_1528: "f32[][]cuda:0" = _foreach_sub_2[48] + getitem_1529: "f32[][]cuda:0" = _foreach_sub_2[49] + getitem_1530: "f32[][]cuda:0" = _foreach_sub_2[50] + getitem_1531: "f32[][]cuda:0" = _foreach_sub_2[51] + getitem_1532: "f32[][]cuda:0" = _foreach_sub_2[52] + getitem_1533: "f32[][]cuda:0" = _foreach_sub_2[53] + getitem_1534: "f32[][]cuda:0" = _foreach_sub_2[54] + getitem_1535: "f32[][]cuda:0" = _foreach_sub_2[55] + getitem_1536: "f32[][]cuda:0" = _foreach_sub_2[56] + getitem_1537: "f32[][]cuda:0" = _foreach_sub_2[57] + getitem_1538: "f32[][]cuda:0" = _foreach_sub_2[58] + getitem_1539: "f32[][]cuda:0" = _foreach_sub_2[59] + getitem_1540: "f32[][]cuda:0" = _foreach_sub_2[60] + getitem_1541: "f32[][]cuda:0" = _foreach_sub_2[61] + getitem_1542: "f32[][]cuda:0" = _foreach_sub_2[62] + getitem_1543: "f32[][]cuda:0" = _foreach_sub_2[63] + getitem_1544: "f32[][]cuda:0" = _foreach_sub_2[64] + getitem_1545: "f32[][]cuda:0" = _foreach_sub_2[65] + getitem_1546: "f32[][]cuda:0" = _foreach_sub_2[66] + getitem_1547: "f32[][]cuda:0" = _foreach_sub_2[67] + getitem_1548: "f32[][]cuda:0" = _foreach_sub_2[68] + getitem_1549: "f32[][]cuda:0" = _foreach_sub_2[69] + getitem_1550: "f32[][]cuda:0" = _foreach_sub_2[70] + getitem_1551: "f32[][]cuda:0" = _foreach_sub_2[71] + getitem_1552: "f32[][]cuda:0" = _foreach_sub_2[72] + getitem_1553: "f32[][]cuda:0" = _foreach_sub_2[73] + getitem_1554: "f32[][]cuda:0" = _foreach_sub_2[74] + getitem_1555: "f32[][]cuda:0" = _foreach_sub_2[75] + getitem_1556: "f32[][]cuda:0" = _foreach_sub_2[76] + getitem_1557: "f32[][]cuda:0" = _foreach_sub_2[77] + getitem_1558: "f32[][]cuda:0" = _foreach_sub_2[78] + getitem_1559: "f32[][]cuda:0" = _foreach_sub_2[79] + getitem_1560: "f32[][]cuda:0" = _foreach_sub_2[80] + getitem_1561: "f32[][]cuda:0" = _foreach_sub_2[81] + getitem_1562: "f32[][]cuda:0" = _foreach_sub_2[82] + getitem_1563: "f32[][]cuda:0" = _foreach_sub_2[83] + getitem_1564: "f32[][]cuda:0" = _foreach_sub_2[84] + getitem_1565: "f32[][]cuda:0" = _foreach_sub_2[85] + getitem_1566: "f32[][]cuda:0" = _foreach_sub_2[86] + getitem_1567: "f32[][]cuda:0" = _foreach_sub_2[87] + getitem_1568: "f32[][]cuda:0" = _foreach_sub_2[88] + getitem_1569: "f32[][]cuda:0" = _foreach_sub_2[89] + getitem_1570: "f32[][]cuda:0" = _foreach_sub_2[90] + getitem_1571: "f32[][]cuda:0" = _foreach_sub_2[91] + getitem_1572: "f32[][]cuda:0" = _foreach_sub_2[92] + getitem_1573: "f32[][]cuda:0" = _foreach_sub_2[93] + getitem_1574: "f32[][]cuda:0" = _foreach_sub_2[94] + getitem_1575: "f32[][]cuda:0" = _foreach_sub_2[95] + getitem_1576: "f32[][]cuda:0" = _foreach_sub_2[96] + getitem_1577: "f32[][]cuda:0" = _foreach_sub_2[97] + getitem_1578: "f32[][]cuda:0" = _foreach_sub_2[98] + getitem_1579: "f32[][]cuda:0" = _foreach_sub_2[99] + getitem_1580: "f32[][]cuda:0" = _foreach_sub_2[100] + getitem_1581: "f32[][]cuda:0" = _foreach_sub_2[101] + getitem_1582: "f32[][]cuda:0" = _foreach_sub_2[102] + getitem_1583: "f32[][]cuda:0" = _foreach_sub_2[103] + getitem_1584: "f32[][]cuda:0" = _foreach_sub_2[104] + getitem_1585: "f32[][]cuda:0" = _foreach_sub_2[105] + getitem_1586: "f32[][]cuda:0" = _foreach_sub_2[106] + getitem_1587: "f32[][]cuda:0" = _foreach_sub_2[107] + getitem_1588: "f32[][]cuda:0" = _foreach_sub_2[108] + getitem_1589: "f32[][]cuda:0" = _foreach_sub_2[109] + getitem_1590: "f32[][]cuda:0" = _foreach_sub_2[110] + getitem_1591: "f32[][]cuda:0" = _foreach_sub_2[111] + getitem_1592: "f32[][]cuda:0" = _foreach_sub_2[112] + getitem_1593: "f32[][]cuda:0" = _foreach_sub_2[113] + getitem_1594: "f32[][]cuda:0" = _foreach_sub_2[114] + getitem_1595: "f32[][]cuda:0" = _foreach_sub_2[115] + getitem_1596: "f32[][]cuda:0" = _foreach_sub_2[116] + getitem_1597: "f32[][]cuda:0" = _foreach_sub_2[117] + getitem_1598: "f32[][]cuda:0" = _foreach_sub_2[118] + getitem_1599: "f32[][]cuda:0" = _foreach_sub_2[119] + getitem_1600: "f32[][]cuda:0" = _foreach_sub_2[120] + getitem_1601: "f32[][]cuda:0" = _foreach_sub_2[121] + getitem_1602: "f32[][]cuda:0" = _foreach_sub_2[122] + getitem_1603: "f32[][]cuda:0" = _foreach_sub_2[123] + getitem_1604: "f32[][]cuda:0" = _foreach_sub_2[124] + getitem_1605: "f32[][]cuda:0" = _foreach_sub_2[125] + getitem_1606: "f32[][]cuda:0" = _foreach_sub_2[126] + getitem_1607: "f32[][]cuda:0" = _foreach_sub_2[127] + getitem_1608: "f32[][]cuda:0" = _foreach_sub_2[128] + getitem_1609: "f32[][]cuda:0" = _foreach_sub_2[129] + getitem_1610: "f32[][]cuda:0" = _foreach_sub_2[130] + getitem_1611: "f32[][]cuda:0" = _foreach_sub_2[131] + getitem_1612: "f32[][]cuda:0" = _foreach_sub_2[132] + getitem_1613: "f32[][]cuda:0" = _foreach_sub_2[133] + getitem_1614: "f32[][]cuda:0" = _foreach_sub_2[134] + getitem_1615: "f32[][]cuda:0" = _foreach_sub_2[135] + getitem_1616: "f32[][]cuda:0" = _foreach_sub_2[136] + getitem_1617: "f32[][]cuda:0" = _foreach_sub_2[137] + getitem_1618: "f32[][]cuda:0" = _foreach_sub_2[138] + getitem_1619: "f32[][]cuda:0" = _foreach_sub_2[139] + getitem_1620: "f32[][]cuda:0" = _foreach_sub_2[140] + getitem_1621: "f32[][]cuda:0" = _foreach_sub_2[141] + getitem_1622: "f32[][]cuda:0" = _foreach_sub_2[142] + getitem_1623: "f32[][]cuda:0" = _foreach_sub_2[143] + getitem_1624: "f32[][]cuda:0" = _foreach_sub_2[144] + getitem_1625: "f32[][]cuda:0" = _foreach_sub_2[145] + getitem_1626: "f32[][]cuda:0" = _foreach_sub_2[146] + getitem_1627: "f32[][]cuda:0" = _foreach_sub_2[147]; _foreach_sub_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2) + _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]); getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None + getitem_1628: "f32[][]cuda:0" = _foreach_neg[0] + getitem_1629: "f32[][]cuda:0" = _foreach_neg[1] + getitem_1630: "f32[][]cuda:0" = _foreach_neg[2] + getitem_1631: "f32[][]cuda:0" = _foreach_neg[3] + getitem_1632: "f32[][]cuda:0" = _foreach_neg[4] + getitem_1633: "f32[][]cuda:0" = _foreach_neg[5] + getitem_1634: "f32[][]cuda:0" = _foreach_neg[6] + getitem_1635: "f32[][]cuda:0" = _foreach_neg[7] + getitem_1636: "f32[][]cuda:0" = _foreach_neg[8] + getitem_1637: "f32[][]cuda:0" = _foreach_neg[9] + getitem_1638: "f32[][]cuda:0" = _foreach_neg[10] + getitem_1639: "f32[][]cuda:0" = _foreach_neg[11] + getitem_1640: "f32[][]cuda:0" = _foreach_neg[12] + getitem_1641: "f32[][]cuda:0" = _foreach_neg[13] + getitem_1642: "f32[][]cuda:0" = _foreach_neg[14] + getitem_1643: "f32[][]cuda:0" = _foreach_neg[15] + getitem_1644: "f32[][]cuda:0" = _foreach_neg[16] + getitem_1645: "f32[][]cuda:0" = _foreach_neg[17] + getitem_1646: "f32[][]cuda:0" = _foreach_neg[18] + getitem_1647: "f32[][]cuda:0" = _foreach_neg[19] + getitem_1648: "f32[][]cuda:0" = _foreach_neg[20] + getitem_1649: "f32[][]cuda:0" = _foreach_neg[21] + getitem_1650: "f32[][]cuda:0" = _foreach_neg[22] + getitem_1651: "f32[][]cuda:0" = _foreach_neg[23] + getitem_1652: "f32[][]cuda:0" = _foreach_neg[24] + getitem_1653: "f32[][]cuda:0" = _foreach_neg[25] + getitem_1654: "f32[][]cuda:0" = _foreach_neg[26] + getitem_1655: "f32[][]cuda:0" = _foreach_neg[27] + getitem_1656: "f32[][]cuda:0" = _foreach_neg[28] + getitem_1657: "f32[][]cuda:0" = _foreach_neg[29] + getitem_1658: "f32[][]cuda:0" = _foreach_neg[30] + getitem_1659: "f32[][]cuda:0" = _foreach_neg[31] + getitem_1660: "f32[][]cuda:0" = _foreach_neg[32] + getitem_1661: "f32[][]cuda:0" = _foreach_neg[33] + getitem_1662: "f32[][]cuda:0" = _foreach_neg[34] + getitem_1663: "f32[][]cuda:0" = _foreach_neg[35] + getitem_1664: "f32[][]cuda:0" = _foreach_neg[36] + getitem_1665: "f32[][]cuda:0" = _foreach_neg[37] + getitem_1666: "f32[][]cuda:0" = _foreach_neg[38] + getitem_1667: "f32[][]cuda:0" = _foreach_neg[39] + getitem_1668: "f32[][]cuda:0" = _foreach_neg[40] + getitem_1669: "f32[][]cuda:0" = _foreach_neg[41] + getitem_1670: "f32[][]cuda:0" = _foreach_neg[42] + getitem_1671: "f32[][]cuda:0" = _foreach_neg[43] + getitem_1672: "f32[][]cuda:0" = _foreach_neg[44] + getitem_1673: "f32[][]cuda:0" = _foreach_neg[45] + getitem_1674: "f32[][]cuda:0" = _foreach_neg[46] + getitem_1675: "f32[][]cuda:0" = _foreach_neg[47] + getitem_1676: "f32[][]cuda:0" = _foreach_neg[48] + getitem_1677: "f32[][]cuda:0" = _foreach_neg[49] + getitem_1678: "f32[][]cuda:0" = _foreach_neg[50] + getitem_1679: "f32[][]cuda:0" = _foreach_neg[51] + getitem_1680: "f32[][]cuda:0" = _foreach_neg[52] + getitem_1681: "f32[][]cuda:0" = _foreach_neg[53] + getitem_1682: "f32[][]cuda:0" = _foreach_neg[54] + getitem_1683: "f32[][]cuda:0" = _foreach_neg[55] + getitem_1684: "f32[][]cuda:0" = _foreach_neg[56] + getitem_1685: "f32[][]cuda:0" = _foreach_neg[57] + getitem_1686: "f32[][]cuda:0" = _foreach_neg[58] + getitem_1687: "f32[][]cuda:0" = _foreach_neg[59] + getitem_1688: "f32[][]cuda:0" = _foreach_neg[60] + getitem_1689: "f32[][]cuda:0" = _foreach_neg[61] + getitem_1690: "f32[][]cuda:0" = _foreach_neg[62] + getitem_1691: "f32[][]cuda:0" = _foreach_neg[63] + getitem_1692: "f32[][]cuda:0" = _foreach_neg[64] + getitem_1693: "f32[][]cuda:0" = _foreach_neg[65] + getitem_1694: "f32[][]cuda:0" = _foreach_neg[66] + getitem_1695: "f32[][]cuda:0" = _foreach_neg[67] + getitem_1696: "f32[][]cuda:0" = _foreach_neg[68] + getitem_1697: "f32[][]cuda:0" = _foreach_neg[69] + getitem_1698: "f32[][]cuda:0" = _foreach_neg[70] + getitem_1699: "f32[][]cuda:0" = _foreach_neg[71] + getitem_1700: "f32[][]cuda:0" = _foreach_neg[72] + getitem_1701: "f32[][]cuda:0" = _foreach_neg[73] + getitem_1702: "f32[][]cuda:0" = _foreach_neg[74] + getitem_1703: "f32[][]cuda:0" = _foreach_neg[75] + getitem_1704: "f32[][]cuda:0" = _foreach_neg[76] + getitem_1705: "f32[][]cuda:0" = _foreach_neg[77] + getitem_1706: "f32[][]cuda:0" = _foreach_neg[78] + getitem_1707: "f32[][]cuda:0" = _foreach_neg[79] + getitem_1708: "f32[][]cuda:0" = _foreach_neg[80] + getitem_1709: "f32[][]cuda:0" = _foreach_neg[81] + getitem_1710: "f32[][]cuda:0" = _foreach_neg[82] + getitem_1711: "f32[][]cuda:0" = _foreach_neg[83] + getitem_1712: "f32[][]cuda:0" = _foreach_neg[84] + getitem_1713: "f32[][]cuda:0" = _foreach_neg[85] + getitem_1714: "f32[][]cuda:0" = _foreach_neg[86] + getitem_1715: "f32[][]cuda:0" = _foreach_neg[87] + getitem_1716: "f32[][]cuda:0" = _foreach_neg[88] + getitem_1717: "f32[][]cuda:0" = _foreach_neg[89] + getitem_1718: "f32[][]cuda:0" = _foreach_neg[90] + getitem_1719: "f32[][]cuda:0" = _foreach_neg[91] + getitem_1720: "f32[][]cuda:0" = _foreach_neg[92] + getitem_1721: "f32[][]cuda:0" = _foreach_neg[93] + getitem_1722: "f32[][]cuda:0" = _foreach_neg[94] + getitem_1723: "f32[][]cuda:0" = _foreach_neg[95] + getitem_1724: "f32[][]cuda:0" = _foreach_neg[96] + getitem_1725: "f32[][]cuda:0" = _foreach_neg[97] + getitem_1726: "f32[][]cuda:0" = _foreach_neg[98] + getitem_1727: "f32[][]cuda:0" = _foreach_neg[99] + getitem_1728: "f32[][]cuda:0" = _foreach_neg[100] + getitem_1729: "f32[][]cuda:0" = _foreach_neg[101] + getitem_1730: "f32[][]cuda:0" = _foreach_neg[102] + getitem_1731: "f32[][]cuda:0" = _foreach_neg[103] + getitem_1732: "f32[][]cuda:0" = _foreach_neg[104] + getitem_1733: "f32[][]cuda:0" = _foreach_neg[105] + getitem_1734: "f32[][]cuda:0" = _foreach_neg[106] + getitem_1735: "f32[][]cuda:0" = _foreach_neg[107] + getitem_1736: "f32[][]cuda:0" = _foreach_neg[108] + getitem_1737: "f32[][]cuda:0" = _foreach_neg[109] + getitem_1738: "f32[][]cuda:0" = _foreach_neg[110] + getitem_1739: "f32[][]cuda:0" = _foreach_neg[111] + getitem_1740: "f32[][]cuda:0" = _foreach_neg[112] + getitem_1741: "f32[][]cuda:0" = _foreach_neg[113] + getitem_1742: "f32[][]cuda:0" = _foreach_neg[114] + getitem_1743: "f32[][]cuda:0" = _foreach_neg[115] + getitem_1744: "f32[][]cuda:0" = _foreach_neg[116] + getitem_1745: "f32[][]cuda:0" = _foreach_neg[117] + getitem_1746: "f32[][]cuda:0" = _foreach_neg[118] + getitem_1747: "f32[][]cuda:0" = _foreach_neg[119] + getitem_1748: "f32[][]cuda:0" = _foreach_neg[120] + getitem_1749: "f32[][]cuda:0" = _foreach_neg[121] + getitem_1750: "f32[][]cuda:0" = _foreach_neg[122] + getitem_1751: "f32[][]cuda:0" = _foreach_neg[123] + getitem_1752: "f32[][]cuda:0" = _foreach_neg[124] + getitem_1753: "f32[][]cuda:0" = _foreach_neg[125] + getitem_1754: "f32[][]cuda:0" = _foreach_neg[126] + getitem_1755: "f32[][]cuda:0" = _foreach_neg[127] + getitem_1756: "f32[][]cuda:0" = _foreach_neg[128] + getitem_1757: "f32[][]cuda:0" = _foreach_neg[129] + getitem_1758: "f32[][]cuda:0" = _foreach_neg[130] + getitem_1759: "f32[][]cuda:0" = _foreach_neg[131] + getitem_1760: "f32[][]cuda:0" = _foreach_neg[132] + getitem_1761: "f32[][]cuda:0" = _foreach_neg[133] + getitem_1762: "f32[][]cuda:0" = _foreach_neg[134] + getitem_1763: "f32[][]cuda:0" = _foreach_neg[135] + getitem_1764: "f32[][]cuda:0" = _foreach_neg[136] + getitem_1765: "f32[][]cuda:0" = _foreach_neg[137] + getitem_1766: "f32[][]cuda:0" = _foreach_neg[138] + getitem_1767: "f32[][]cuda:0" = _foreach_neg[139] + getitem_1768: "f32[][]cuda:0" = _foreach_neg[140] + getitem_1769: "f32[][]cuda:0" = _foreach_neg[141] + getitem_1770: "f32[][]cuda:0" = _foreach_neg[142] + getitem_1771: "f32[][]cuda:0" = _foreach_neg[143] + getitem_1772: "f32[][]cuda:0" = _foreach_neg[144] + getitem_1773: "f32[][]cuda:0" = _foreach_neg[145] + getitem_1774: "f32[][]cuda:0" = _foreach_neg[146] + getitem_1775: "f32[][]cuda:0" = _foreach_neg[147]; _foreach_neg = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr) + _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01); getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None + getitem_1776: "f32[][]cuda:0" = _foreach_div[0] + getitem_1777: "f32[][]cuda:0" = _foreach_div[1] + getitem_1778: "f32[][]cuda:0" = _foreach_div[2] + getitem_1779: "f32[][]cuda:0" = _foreach_div[3] + getitem_1780: "f32[][]cuda:0" = _foreach_div[4] + getitem_1781: "f32[][]cuda:0" = _foreach_div[5] + getitem_1782: "f32[][]cuda:0" = _foreach_div[6] + getitem_1783: "f32[][]cuda:0" = _foreach_div[7] + getitem_1784: "f32[][]cuda:0" = _foreach_div[8] + getitem_1785: "f32[][]cuda:0" = _foreach_div[9] + getitem_1786: "f32[][]cuda:0" = _foreach_div[10] + getitem_1787: "f32[][]cuda:0" = _foreach_div[11] + getitem_1788: "f32[][]cuda:0" = _foreach_div[12] + getitem_1789: "f32[][]cuda:0" = _foreach_div[13] + getitem_1790: "f32[][]cuda:0" = _foreach_div[14] + getitem_1791: "f32[][]cuda:0" = _foreach_div[15] + getitem_1792: "f32[][]cuda:0" = _foreach_div[16] + getitem_1793: "f32[][]cuda:0" = _foreach_div[17] + getitem_1794: "f32[][]cuda:0" = _foreach_div[18] + getitem_1795: "f32[][]cuda:0" = _foreach_div[19] + getitem_1796: "f32[][]cuda:0" = _foreach_div[20] + getitem_1797: "f32[][]cuda:0" = _foreach_div[21] + getitem_1798: "f32[][]cuda:0" = _foreach_div[22] + getitem_1799: "f32[][]cuda:0" = _foreach_div[23] + getitem_1800: "f32[][]cuda:0" = _foreach_div[24] + getitem_1801: "f32[][]cuda:0" = _foreach_div[25] + getitem_1802: "f32[][]cuda:0" = _foreach_div[26] + getitem_1803: "f32[][]cuda:0" = _foreach_div[27] + getitem_1804: "f32[][]cuda:0" = _foreach_div[28] + getitem_1805: "f32[][]cuda:0" = _foreach_div[29] + getitem_1806: "f32[][]cuda:0" = _foreach_div[30] + getitem_1807: "f32[][]cuda:0" = _foreach_div[31] + getitem_1808: "f32[][]cuda:0" = _foreach_div[32] + getitem_1809: "f32[][]cuda:0" = _foreach_div[33] + getitem_1810: "f32[][]cuda:0" = _foreach_div[34] + getitem_1811: "f32[][]cuda:0" = _foreach_div[35] + getitem_1812: "f32[][]cuda:0" = _foreach_div[36] + getitem_1813: "f32[][]cuda:0" = _foreach_div[37] + getitem_1814: "f32[][]cuda:0" = _foreach_div[38] + getitem_1815: "f32[][]cuda:0" = _foreach_div[39] + getitem_1816: "f32[][]cuda:0" = _foreach_div[40] + getitem_1817: "f32[][]cuda:0" = _foreach_div[41] + getitem_1818: "f32[][]cuda:0" = _foreach_div[42] + getitem_1819: "f32[][]cuda:0" = _foreach_div[43] + getitem_1820: "f32[][]cuda:0" = _foreach_div[44] + getitem_1821: "f32[][]cuda:0" = _foreach_div[45] + getitem_1822: "f32[][]cuda:0" = _foreach_div[46] + getitem_1823: "f32[][]cuda:0" = _foreach_div[47] + getitem_1824: "f32[][]cuda:0" = _foreach_div[48] + getitem_1825: "f32[][]cuda:0" = _foreach_div[49] + getitem_1826: "f32[][]cuda:0" = _foreach_div[50] + getitem_1827: "f32[][]cuda:0" = _foreach_div[51] + getitem_1828: "f32[][]cuda:0" = _foreach_div[52] + getitem_1829: "f32[][]cuda:0" = _foreach_div[53] + getitem_1830: "f32[][]cuda:0" = _foreach_div[54] + getitem_1831: "f32[][]cuda:0" = _foreach_div[55] + getitem_1832: "f32[][]cuda:0" = _foreach_div[56] + getitem_1833: "f32[][]cuda:0" = _foreach_div[57] + getitem_1834: "f32[][]cuda:0" = _foreach_div[58] + getitem_1835: "f32[][]cuda:0" = _foreach_div[59] + getitem_1836: "f32[][]cuda:0" = _foreach_div[60] + getitem_1837: "f32[][]cuda:0" = _foreach_div[61] + getitem_1838: "f32[][]cuda:0" = _foreach_div[62] + getitem_1839: "f32[][]cuda:0" = _foreach_div[63] + getitem_1840: "f32[][]cuda:0" = _foreach_div[64] + getitem_1841: "f32[][]cuda:0" = _foreach_div[65] + getitem_1842: "f32[][]cuda:0" = _foreach_div[66] + getitem_1843: "f32[][]cuda:0" = _foreach_div[67] + getitem_1844: "f32[][]cuda:0" = _foreach_div[68] + getitem_1845: "f32[][]cuda:0" = _foreach_div[69] + getitem_1846: "f32[][]cuda:0" = _foreach_div[70] + getitem_1847: "f32[][]cuda:0" = _foreach_div[71] + getitem_1848: "f32[][]cuda:0" = _foreach_div[72] + getitem_1849: "f32[][]cuda:0" = _foreach_div[73] + getitem_1850: "f32[][]cuda:0" = _foreach_div[74] + getitem_1851: "f32[][]cuda:0" = _foreach_div[75] + getitem_1852: "f32[][]cuda:0" = _foreach_div[76] + getitem_1853: "f32[][]cuda:0" = _foreach_div[77] + getitem_1854: "f32[][]cuda:0" = _foreach_div[78] + getitem_1855: "f32[][]cuda:0" = _foreach_div[79] + getitem_1856: "f32[][]cuda:0" = _foreach_div[80] + getitem_1857: "f32[][]cuda:0" = _foreach_div[81] + getitem_1858: "f32[][]cuda:0" = _foreach_div[82] + getitem_1859: "f32[][]cuda:0" = _foreach_div[83] + getitem_1860: "f32[][]cuda:0" = _foreach_div[84] + getitem_1861: "f32[][]cuda:0" = _foreach_div[85] + getitem_1862: "f32[][]cuda:0" = _foreach_div[86] + getitem_1863: "f32[][]cuda:0" = _foreach_div[87] + getitem_1864: "f32[][]cuda:0" = _foreach_div[88] + getitem_1865: "f32[][]cuda:0" = _foreach_div[89] + getitem_1866: "f32[][]cuda:0" = _foreach_div[90] + getitem_1867: "f32[][]cuda:0" = _foreach_div[91] + getitem_1868: "f32[][]cuda:0" = _foreach_div[92] + getitem_1869: "f32[][]cuda:0" = _foreach_div[93] + getitem_1870: "f32[][]cuda:0" = _foreach_div[94] + getitem_1871: "f32[][]cuda:0" = _foreach_div[95] + getitem_1872: "f32[][]cuda:0" = _foreach_div[96] + getitem_1873: "f32[][]cuda:0" = _foreach_div[97] + getitem_1874: "f32[][]cuda:0" = _foreach_div[98] + getitem_1875: "f32[][]cuda:0" = _foreach_div[99] + getitem_1876: "f32[][]cuda:0" = _foreach_div[100] + getitem_1877: "f32[][]cuda:0" = _foreach_div[101] + getitem_1878: "f32[][]cuda:0" = _foreach_div[102] + getitem_1879: "f32[][]cuda:0" = _foreach_div[103] + getitem_1880: "f32[][]cuda:0" = _foreach_div[104] + getitem_1881: "f32[][]cuda:0" = _foreach_div[105] + getitem_1882: "f32[][]cuda:0" = _foreach_div[106] + getitem_1883: "f32[][]cuda:0" = _foreach_div[107] + getitem_1884: "f32[][]cuda:0" = _foreach_div[108] + getitem_1885: "f32[][]cuda:0" = _foreach_div[109] + getitem_1886: "f32[][]cuda:0" = _foreach_div[110] + getitem_1887: "f32[][]cuda:0" = _foreach_div[111] + getitem_1888: "f32[][]cuda:0" = _foreach_div[112] + getitem_1889: "f32[][]cuda:0" = _foreach_div[113] + getitem_1890: "f32[][]cuda:0" = _foreach_div[114] + getitem_1891: "f32[][]cuda:0" = _foreach_div[115] + getitem_1892: "f32[][]cuda:0" = _foreach_div[116] + getitem_1893: "f32[][]cuda:0" = _foreach_div[117] + getitem_1894: "f32[][]cuda:0" = _foreach_div[118] + getitem_1895: "f32[][]cuda:0" = _foreach_div[119] + getitem_1896: "f32[][]cuda:0" = _foreach_div[120] + getitem_1897: "f32[][]cuda:0" = _foreach_div[121] + getitem_1898: "f32[][]cuda:0" = _foreach_div[122] + getitem_1899: "f32[][]cuda:0" = _foreach_div[123] + getitem_1900: "f32[][]cuda:0" = _foreach_div[124] + getitem_1901: "f32[][]cuda:0" = _foreach_div[125] + getitem_1902: "f32[][]cuda:0" = _foreach_div[126] + getitem_1903: "f32[][]cuda:0" = _foreach_div[127] + getitem_1904: "f32[][]cuda:0" = _foreach_div[128] + getitem_1905: "f32[][]cuda:0" = _foreach_div[129] + getitem_1906: "f32[][]cuda:0" = _foreach_div[130] + getitem_1907: "f32[][]cuda:0" = _foreach_div[131] + getitem_1908: "f32[][]cuda:0" = _foreach_div[132] + getitem_1909: "f32[][]cuda:0" = _foreach_div[133] + getitem_1910: "f32[][]cuda:0" = _foreach_div[134] + getitem_1911: "f32[][]cuda:0" = _foreach_div[135] + getitem_1912: "f32[][]cuda:0" = _foreach_div[136] + getitem_1913: "f32[][]cuda:0" = _foreach_div[137] + getitem_1914: "f32[][]cuda:0" = _foreach_div[138] + getitem_1915: "f32[][]cuda:0" = _foreach_div[139] + getitem_1916: "f32[][]cuda:0" = _foreach_div[140] + getitem_1917: "f32[][]cuda:0" = _foreach_div[141] + getitem_1918: "f32[][]cuda:0" = _foreach_div[142] + getitem_1919: "f32[][]cuda:0" = _foreach_div[143] + getitem_1920: "f32[][]cuda:0" = _foreach_div[144] + getitem_1921: "f32[][]cuda:0" = _foreach_div[145] + getitem_1922: "f32[][]cuda:0" = _foreach_div[146] + getitem_1923: "f32[][]cuda:0" = _foreach_div[147]; _foreach_div = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1) + _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]); getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None + getitem_1924: "f32[][]cuda:0" = _foreach_reciprocal[0] + getitem_1925: "f32[][]cuda:0" = _foreach_reciprocal[1] + getitem_1926: "f32[][]cuda:0" = _foreach_reciprocal[2] + getitem_1927: "f32[][]cuda:0" = _foreach_reciprocal[3] + getitem_1928: "f32[][]cuda:0" = _foreach_reciprocal[4] + getitem_1929: "f32[][]cuda:0" = _foreach_reciprocal[5] + getitem_1930: "f32[][]cuda:0" = _foreach_reciprocal[6] + getitem_1931: "f32[][]cuda:0" = _foreach_reciprocal[7] + getitem_1932: "f32[][]cuda:0" = _foreach_reciprocal[8] + getitem_1933: "f32[][]cuda:0" = _foreach_reciprocal[9] + getitem_1934: "f32[][]cuda:0" = _foreach_reciprocal[10] + getitem_1935: "f32[][]cuda:0" = _foreach_reciprocal[11] + getitem_1936: "f32[][]cuda:0" = _foreach_reciprocal[12] + getitem_1937: "f32[][]cuda:0" = _foreach_reciprocal[13] + getitem_1938: "f32[][]cuda:0" = _foreach_reciprocal[14] + getitem_1939: "f32[][]cuda:0" = _foreach_reciprocal[15] + getitem_1940: "f32[][]cuda:0" = _foreach_reciprocal[16] + getitem_1941: "f32[][]cuda:0" = _foreach_reciprocal[17] + getitem_1942: "f32[][]cuda:0" = _foreach_reciprocal[18] + getitem_1943: "f32[][]cuda:0" = _foreach_reciprocal[19] + getitem_1944: "f32[][]cuda:0" = _foreach_reciprocal[20] + getitem_1945: "f32[][]cuda:0" = _foreach_reciprocal[21] + getitem_1946: "f32[][]cuda:0" = _foreach_reciprocal[22] + getitem_1947: "f32[][]cuda:0" = _foreach_reciprocal[23] + getitem_1948: "f32[][]cuda:0" = _foreach_reciprocal[24] + getitem_1949: "f32[][]cuda:0" = _foreach_reciprocal[25] + getitem_1950: "f32[][]cuda:0" = _foreach_reciprocal[26] + getitem_1951: "f32[][]cuda:0" = _foreach_reciprocal[27] + getitem_1952: "f32[][]cuda:0" = _foreach_reciprocal[28] + getitem_1953: "f32[][]cuda:0" = _foreach_reciprocal[29] + getitem_1954: "f32[][]cuda:0" = _foreach_reciprocal[30] + getitem_1955: "f32[][]cuda:0" = _foreach_reciprocal[31] + getitem_1956: "f32[][]cuda:0" = _foreach_reciprocal[32] + getitem_1957: "f32[][]cuda:0" = _foreach_reciprocal[33] + getitem_1958: "f32[][]cuda:0" = _foreach_reciprocal[34] + getitem_1959: "f32[][]cuda:0" = _foreach_reciprocal[35] + getitem_1960: "f32[][]cuda:0" = _foreach_reciprocal[36] + getitem_1961: "f32[][]cuda:0" = _foreach_reciprocal[37] + getitem_1962: "f32[][]cuda:0" = _foreach_reciprocal[38] + getitem_1963: "f32[][]cuda:0" = _foreach_reciprocal[39] + getitem_1964: "f32[][]cuda:0" = _foreach_reciprocal[40] + getitem_1965: "f32[][]cuda:0" = _foreach_reciprocal[41] + getitem_1966: "f32[][]cuda:0" = _foreach_reciprocal[42] + getitem_1967: "f32[][]cuda:0" = _foreach_reciprocal[43] + getitem_1968: "f32[][]cuda:0" = _foreach_reciprocal[44] + getitem_1969: "f32[][]cuda:0" = _foreach_reciprocal[45] + getitem_1970: "f32[][]cuda:0" = _foreach_reciprocal[46] + getitem_1971: "f32[][]cuda:0" = _foreach_reciprocal[47] + getitem_1972: "f32[][]cuda:0" = _foreach_reciprocal[48] + getitem_1973: "f32[][]cuda:0" = _foreach_reciprocal[49] + getitem_1974: "f32[][]cuda:0" = _foreach_reciprocal[50] + getitem_1975: "f32[][]cuda:0" = _foreach_reciprocal[51] + getitem_1976: "f32[][]cuda:0" = _foreach_reciprocal[52] + getitem_1977: "f32[][]cuda:0" = _foreach_reciprocal[53] + getitem_1978: "f32[][]cuda:0" = _foreach_reciprocal[54] + getitem_1979: "f32[][]cuda:0" = _foreach_reciprocal[55] + getitem_1980: "f32[][]cuda:0" = _foreach_reciprocal[56] + getitem_1981: "f32[][]cuda:0" = _foreach_reciprocal[57] + getitem_1982: "f32[][]cuda:0" = _foreach_reciprocal[58] + getitem_1983: "f32[][]cuda:0" = _foreach_reciprocal[59] + getitem_1984: "f32[][]cuda:0" = _foreach_reciprocal[60] + getitem_1985: "f32[][]cuda:0" = _foreach_reciprocal[61] + getitem_1986: "f32[][]cuda:0" = _foreach_reciprocal[62] + getitem_1987: "f32[][]cuda:0" = _foreach_reciprocal[63] + getitem_1988: "f32[][]cuda:0" = _foreach_reciprocal[64] + getitem_1989: "f32[][]cuda:0" = _foreach_reciprocal[65] + getitem_1990: "f32[][]cuda:0" = _foreach_reciprocal[66] + getitem_1991: "f32[][]cuda:0" = _foreach_reciprocal[67] + getitem_1992: "f32[][]cuda:0" = _foreach_reciprocal[68] + getitem_1993: "f32[][]cuda:0" = _foreach_reciprocal[69] + getitem_1994: "f32[][]cuda:0" = _foreach_reciprocal[70] + getitem_1995: "f32[][]cuda:0" = _foreach_reciprocal[71] + getitem_1996: "f32[][]cuda:0" = _foreach_reciprocal[72] + getitem_1997: "f32[][]cuda:0" = _foreach_reciprocal[73] + getitem_1998: "f32[][]cuda:0" = _foreach_reciprocal[74] + getitem_1999: "f32[][]cuda:0" = _foreach_reciprocal[75] + getitem_2000: "f32[][]cuda:0" = _foreach_reciprocal[76] + getitem_2001: "f32[][]cuda:0" = _foreach_reciprocal[77] + getitem_2002: "f32[][]cuda:0" = _foreach_reciprocal[78] + getitem_2003: "f32[][]cuda:0" = _foreach_reciprocal[79] + getitem_2004: "f32[][]cuda:0" = _foreach_reciprocal[80] + getitem_2005: "f32[][]cuda:0" = _foreach_reciprocal[81] + getitem_2006: "f32[][]cuda:0" = _foreach_reciprocal[82] + getitem_2007: "f32[][]cuda:0" = _foreach_reciprocal[83] + getitem_2008: "f32[][]cuda:0" = _foreach_reciprocal[84] + getitem_2009: "f32[][]cuda:0" = _foreach_reciprocal[85] + getitem_2010: "f32[][]cuda:0" = _foreach_reciprocal[86] + getitem_2011: "f32[][]cuda:0" = _foreach_reciprocal[87] + getitem_2012: "f32[][]cuda:0" = _foreach_reciprocal[88] + getitem_2013: "f32[][]cuda:0" = _foreach_reciprocal[89] + getitem_2014: "f32[][]cuda:0" = _foreach_reciprocal[90] + getitem_2015: "f32[][]cuda:0" = _foreach_reciprocal[91] + getitem_2016: "f32[][]cuda:0" = _foreach_reciprocal[92] + getitem_2017: "f32[][]cuda:0" = _foreach_reciprocal[93] + getitem_2018: "f32[][]cuda:0" = _foreach_reciprocal[94] + getitem_2019: "f32[][]cuda:0" = _foreach_reciprocal[95] + getitem_2020: "f32[][]cuda:0" = _foreach_reciprocal[96] + getitem_2021: "f32[][]cuda:0" = _foreach_reciprocal[97] + getitem_2022: "f32[][]cuda:0" = _foreach_reciprocal[98] + getitem_2023: "f32[][]cuda:0" = _foreach_reciprocal[99] + getitem_2024: "f32[][]cuda:0" = _foreach_reciprocal[100] + getitem_2025: "f32[][]cuda:0" = _foreach_reciprocal[101] + getitem_2026: "f32[][]cuda:0" = _foreach_reciprocal[102] + getitem_2027: "f32[][]cuda:0" = _foreach_reciprocal[103] + getitem_2028: "f32[][]cuda:0" = _foreach_reciprocal[104] + getitem_2029: "f32[][]cuda:0" = _foreach_reciprocal[105] + getitem_2030: "f32[][]cuda:0" = _foreach_reciprocal[106] + getitem_2031: "f32[][]cuda:0" = _foreach_reciprocal[107] + getitem_2032: "f32[][]cuda:0" = _foreach_reciprocal[108] + getitem_2033: "f32[][]cuda:0" = _foreach_reciprocal[109] + getitem_2034: "f32[][]cuda:0" = _foreach_reciprocal[110] + getitem_2035: "f32[][]cuda:0" = _foreach_reciprocal[111] + getitem_2036: "f32[][]cuda:0" = _foreach_reciprocal[112] + getitem_2037: "f32[][]cuda:0" = _foreach_reciprocal[113] + getitem_2038: "f32[][]cuda:0" = _foreach_reciprocal[114] + getitem_2039: "f32[][]cuda:0" = _foreach_reciprocal[115] + getitem_2040: "f32[][]cuda:0" = _foreach_reciprocal[116] + getitem_2041: "f32[][]cuda:0" = _foreach_reciprocal[117] + getitem_2042: "f32[][]cuda:0" = _foreach_reciprocal[118] + getitem_2043: "f32[][]cuda:0" = _foreach_reciprocal[119] + getitem_2044: "f32[][]cuda:0" = _foreach_reciprocal[120] + getitem_2045: "f32[][]cuda:0" = _foreach_reciprocal[121] + getitem_2046: "f32[][]cuda:0" = _foreach_reciprocal[122] + getitem_2047: "f32[][]cuda:0" = _foreach_reciprocal[123] + getitem_2048: "f32[][]cuda:0" = _foreach_reciprocal[124] + getitem_2049: "f32[][]cuda:0" = _foreach_reciprocal[125] + getitem_2050: "f32[][]cuda:0" = _foreach_reciprocal[126] + getitem_2051: "f32[][]cuda:0" = _foreach_reciprocal[127] + getitem_2052: "f32[][]cuda:0" = _foreach_reciprocal[128] + getitem_2053: "f32[][]cuda:0" = _foreach_reciprocal[129] + getitem_2054: "f32[][]cuda:0" = _foreach_reciprocal[130] + getitem_2055: "f32[][]cuda:0" = _foreach_reciprocal[131] + getitem_2056: "f32[][]cuda:0" = _foreach_reciprocal[132] + getitem_2057: "f32[][]cuda:0" = _foreach_reciprocal[133] + getitem_2058: "f32[][]cuda:0" = _foreach_reciprocal[134] + getitem_2059: "f32[][]cuda:0" = _foreach_reciprocal[135] + getitem_2060: "f32[][]cuda:0" = _foreach_reciprocal[136] + getitem_2061: "f32[][]cuda:0" = _foreach_reciprocal[137] + getitem_2062: "f32[][]cuda:0" = _foreach_reciprocal[138] + getitem_2063: "f32[][]cuda:0" = _foreach_reciprocal[139] + getitem_2064: "f32[][]cuda:0" = _foreach_reciprocal[140] + getitem_2065: "f32[][]cuda:0" = _foreach_reciprocal[141] + getitem_2066: "f32[][]cuda:0" = _foreach_reciprocal[142] + getitem_2067: "f32[][]cuda:0" = _foreach_reciprocal[143] + getitem_2068: "f32[][]cuda:0" = _foreach_reciprocal[144] + getitem_2069: "f32[][]cuda:0" = _foreach_reciprocal[145] + getitem_2070: "f32[][]cuda:0" = _foreach_reciprocal[146] + getitem_2071: "f32[][]cuda:0" = _foreach_reciprocal[147]; _foreach_reciprocal = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2) + _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]); getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None + getitem_2072: "f32[][]cuda:0" = _foreach_sqrt[0] + getitem_2073: "f32[][]cuda:0" = _foreach_sqrt[1] + getitem_2074: "f32[][]cuda:0" = _foreach_sqrt[2] + getitem_2075: "f32[][]cuda:0" = _foreach_sqrt[3] + getitem_2076: "f32[][]cuda:0" = _foreach_sqrt[4] + getitem_2077: "f32[][]cuda:0" = _foreach_sqrt[5] + getitem_2078: "f32[][]cuda:0" = _foreach_sqrt[6] + getitem_2079: "f32[][]cuda:0" = _foreach_sqrt[7] + getitem_2080: "f32[][]cuda:0" = _foreach_sqrt[8] + getitem_2081: "f32[][]cuda:0" = _foreach_sqrt[9] + getitem_2082: "f32[][]cuda:0" = _foreach_sqrt[10] + getitem_2083: "f32[][]cuda:0" = _foreach_sqrt[11] + getitem_2084: "f32[][]cuda:0" = _foreach_sqrt[12] + getitem_2085: "f32[][]cuda:0" = _foreach_sqrt[13] + getitem_2086: "f32[][]cuda:0" = _foreach_sqrt[14] + getitem_2087: "f32[][]cuda:0" = _foreach_sqrt[15] + getitem_2088: "f32[][]cuda:0" = _foreach_sqrt[16] + getitem_2089: "f32[][]cuda:0" = _foreach_sqrt[17] + getitem_2090: "f32[][]cuda:0" = _foreach_sqrt[18] + getitem_2091: "f32[][]cuda:0" = _foreach_sqrt[19] + getitem_2092: "f32[][]cuda:0" = _foreach_sqrt[20] + getitem_2093: "f32[][]cuda:0" = _foreach_sqrt[21] + getitem_2094: "f32[][]cuda:0" = _foreach_sqrt[22] + getitem_2095: "f32[][]cuda:0" = _foreach_sqrt[23] + getitem_2096: "f32[][]cuda:0" = _foreach_sqrt[24] + getitem_2097: "f32[][]cuda:0" = _foreach_sqrt[25] + getitem_2098: "f32[][]cuda:0" = _foreach_sqrt[26] + getitem_2099: "f32[][]cuda:0" = _foreach_sqrt[27] + getitem_2100: "f32[][]cuda:0" = _foreach_sqrt[28] + getitem_2101: "f32[][]cuda:0" = _foreach_sqrt[29] + getitem_2102: "f32[][]cuda:0" = _foreach_sqrt[30] + getitem_2103: "f32[][]cuda:0" = _foreach_sqrt[31] + getitem_2104: "f32[][]cuda:0" = _foreach_sqrt[32] + getitem_2105: "f32[][]cuda:0" = _foreach_sqrt[33] + getitem_2106: "f32[][]cuda:0" = _foreach_sqrt[34] + getitem_2107: "f32[][]cuda:0" = _foreach_sqrt[35] + getitem_2108: "f32[][]cuda:0" = _foreach_sqrt[36] + getitem_2109: "f32[][]cuda:0" = _foreach_sqrt[37] + getitem_2110: "f32[][]cuda:0" = _foreach_sqrt[38] + getitem_2111: "f32[][]cuda:0" = _foreach_sqrt[39] + getitem_2112: "f32[][]cuda:0" = _foreach_sqrt[40] + getitem_2113: "f32[][]cuda:0" = _foreach_sqrt[41] + getitem_2114: "f32[][]cuda:0" = _foreach_sqrt[42] + getitem_2115: "f32[][]cuda:0" = _foreach_sqrt[43] + getitem_2116: "f32[][]cuda:0" = _foreach_sqrt[44] + getitem_2117: "f32[][]cuda:0" = _foreach_sqrt[45] + getitem_2118: "f32[][]cuda:0" = _foreach_sqrt[46] + getitem_2119: "f32[][]cuda:0" = _foreach_sqrt[47] + getitem_2120: "f32[][]cuda:0" = _foreach_sqrt[48] + getitem_2121: "f32[][]cuda:0" = _foreach_sqrt[49] + getitem_2122: "f32[][]cuda:0" = _foreach_sqrt[50] + getitem_2123: "f32[][]cuda:0" = _foreach_sqrt[51] + getitem_2124: "f32[][]cuda:0" = _foreach_sqrt[52] + getitem_2125: "f32[][]cuda:0" = _foreach_sqrt[53] + getitem_2126: "f32[][]cuda:0" = _foreach_sqrt[54] + getitem_2127: "f32[][]cuda:0" = _foreach_sqrt[55] + getitem_2128: "f32[][]cuda:0" = _foreach_sqrt[56] + getitem_2129: "f32[][]cuda:0" = _foreach_sqrt[57] + getitem_2130: "f32[][]cuda:0" = _foreach_sqrt[58] + getitem_2131: "f32[][]cuda:0" = _foreach_sqrt[59] + getitem_2132: "f32[][]cuda:0" = _foreach_sqrt[60] + getitem_2133: "f32[][]cuda:0" = _foreach_sqrt[61] + getitem_2134: "f32[][]cuda:0" = _foreach_sqrt[62] + getitem_2135: "f32[][]cuda:0" = _foreach_sqrt[63] + getitem_2136: "f32[][]cuda:0" = _foreach_sqrt[64] + getitem_2137: "f32[][]cuda:0" = _foreach_sqrt[65] + getitem_2138: "f32[][]cuda:0" = _foreach_sqrt[66] + getitem_2139: "f32[][]cuda:0" = _foreach_sqrt[67] + getitem_2140: "f32[][]cuda:0" = _foreach_sqrt[68] + getitem_2141: "f32[][]cuda:0" = _foreach_sqrt[69] + getitem_2142: "f32[][]cuda:0" = _foreach_sqrt[70] + getitem_2143: "f32[][]cuda:0" = _foreach_sqrt[71] + getitem_2144: "f32[][]cuda:0" = _foreach_sqrt[72] + getitem_2145: "f32[][]cuda:0" = _foreach_sqrt[73] + getitem_2146: "f32[][]cuda:0" = _foreach_sqrt[74] + getitem_2147: "f32[][]cuda:0" = _foreach_sqrt[75] + getitem_2148: "f32[][]cuda:0" = _foreach_sqrt[76] + getitem_2149: "f32[][]cuda:0" = _foreach_sqrt[77] + getitem_2150: "f32[][]cuda:0" = _foreach_sqrt[78] + getitem_2151: "f32[][]cuda:0" = _foreach_sqrt[79] + getitem_2152: "f32[][]cuda:0" = _foreach_sqrt[80] + getitem_2153: "f32[][]cuda:0" = _foreach_sqrt[81] + getitem_2154: "f32[][]cuda:0" = _foreach_sqrt[82] + getitem_2155: "f32[][]cuda:0" = _foreach_sqrt[83] + getitem_2156: "f32[][]cuda:0" = _foreach_sqrt[84] + getitem_2157: "f32[][]cuda:0" = _foreach_sqrt[85] + getitem_2158: "f32[][]cuda:0" = _foreach_sqrt[86] + getitem_2159: "f32[][]cuda:0" = _foreach_sqrt[87] + getitem_2160: "f32[][]cuda:0" = _foreach_sqrt[88] + getitem_2161: "f32[][]cuda:0" = _foreach_sqrt[89] + getitem_2162: "f32[][]cuda:0" = _foreach_sqrt[90] + getitem_2163: "f32[][]cuda:0" = _foreach_sqrt[91] + getitem_2164: "f32[][]cuda:0" = _foreach_sqrt[92] + getitem_2165: "f32[][]cuda:0" = _foreach_sqrt[93] + getitem_2166: "f32[][]cuda:0" = _foreach_sqrt[94] + getitem_2167: "f32[][]cuda:0" = _foreach_sqrt[95] + getitem_2168: "f32[][]cuda:0" = _foreach_sqrt[96] + getitem_2169: "f32[][]cuda:0" = _foreach_sqrt[97] + getitem_2170: "f32[][]cuda:0" = _foreach_sqrt[98] + getitem_2171: "f32[][]cuda:0" = _foreach_sqrt[99] + getitem_2172: "f32[][]cuda:0" = _foreach_sqrt[100] + getitem_2173: "f32[][]cuda:0" = _foreach_sqrt[101] + getitem_2174: "f32[][]cuda:0" = _foreach_sqrt[102] + getitem_2175: "f32[][]cuda:0" = _foreach_sqrt[103] + getitem_2176: "f32[][]cuda:0" = _foreach_sqrt[104] + getitem_2177: "f32[][]cuda:0" = _foreach_sqrt[105] + getitem_2178: "f32[][]cuda:0" = _foreach_sqrt[106] + getitem_2179: "f32[][]cuda:0" = _foreach_sqrt[107] + getitem_2180: "f32[][]cuda:0" = _foreach_sqrt[108] + getitem_2181: "f32[][]cuda:0" = _foreach_sqrt[109] + getitem_2182: "f32[][]cuda:0" = _foreach_sqrt[110] + getitem_2183: "f32[][]cuda:0" = _foreach_sqrt[111] + getitem_2184: "f32[][]cuda:0" = _foreach_sqrt[112] + getitem_2185: "f32[][]cuda:0" = _foreach_sqrt[113] + getitem_2186: "f32[][]cuda:0" = _foreach_sqrt[114] + getitem_2187: "f32[][]cuda:0" = _foreach_sqrt[115] + getitem_2188: "f32[][]cuda:0" = _foreach_sqrt[116] + getitem_2189: "f32[][]cuda:0" = _foreach_sqrt[117] + getitem_2190: "f32[][]cuda:0" = _foreach_sqrt[118] + getitem_2191: "f32[][]cuda:0" = _foreach_sqrt[119] + getitem_2192: "f32[][]cuda:0" = _foreach_sqrt[120] + getitem_2193: "f32[][]cuda:0" = _foreach_sqrt[121] + getitem_2194: "f32[][]cuda:0" = _foreach_sqrt[122] + getitem_2195: "f32[][]cuda:0" = _foreach_sqrt[123] + getitem_2196: "f32[][]cuda:0" = _foreach_sqrt[124] + getitem_2197: "f32[][]cuda:0" = _foreach_sqrt[125] + getitem_2198: "f32[][]cuda:0" = _foreach_sqrt[126] + getitem_2199: "f32[][]cuda:0" = _foreach_sqrt[127] + getitem_2200: "f32[][]cuda:0" = _foreach_sqrt[128] + getitem_2201: "f32[][]cuda:0" = _foreach_sqrt[129] + getitem_2202: "f32[][]cuda:0" = _foreach_sqrt[130] + getitem_2203: "f32[][]cuda:0" = _foreach_sqrt[131] + getitem_2204: "f32[][]cuda:0" = _foreach_sqrt[132] + getitem_2205: "f32[][]cuda:0" = _foreach_sqrt[133] + getitem_2206: "f32[][]cuda:0" = _foreach_sqrt[134] + getitem_2207: "f32[][]cuda:0" = _foreach_sqrt[135] + getitem_2208: "f32[][]cuda:0" = _foreach_sqrt[136] + getitem_2209: "f32[][]cuda:0" = _foreach_sqrt[137] + getitem_2210: "f32[][]cuda:0" = _foreach_sqrt[138] + getitem_2211: "f32[][]cuda:0" = _foreach_sqrt[139] + getitem_2212: "f32[][]cuda:0" = _foreach_sqrt[140] + getitem_2213: "f32[][]cuda:0" = _foreach_sqrt[141] + getitem_2214: "f32[][]cuda:0" = _foreach_sqrt[142] + getitem_2215: "f32[][]cuda:0" = _foreach_sqrt[143] + getitem_2216: "f32[][]cuda:0" = _foreach_sqrt[144] + getitem_2217: "f32[][]cuda:0" = _foreach_sqrt[145] + getitem_2218: "f32[][]cuda:0" = _foreach_sqrt[146] + getitem_2219: "f32[][]cuda:0" = _foreach_sqrt[147]; _foreach_sqrt = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs) + _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035]) + getitem_2220: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt_1[0] + getitem_2221: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt_1[1] + getitem_2222: "f32[768][1]cuda:0" = _foreach_sqrt_1[2] + getitem_2223: "f32[768][1]cuda:0" = _foreach_sqrt_1[3] + getitem_2224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[4] + getitem_2225: "f32[2304][1]cuda:0" = _foreach_sqrt_1[5] + getitem_2226: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[6] + getitem_2227: "f32[768][1]cuda:0" = _foreach_sqrt_1[7] + getitem_2228: "f32[768][1]cuda:0" = _foreach_sqrt_1[8] + getitem_2229: "f32[768][1]cuda:0" = _foreach_sqrt_1[9] + getitem_2230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[10] + getitem_2231: "f32[3072][1]cuda:0" = _foreach_sqrt_1[11] + getitem_2232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[12] + getitem_2233: "f32[768][1]cuda:0" = _foreach_sqrt_1[13] + getitem_2234: "f32[768][1]cuda:0" = _foreach_sqrt_1[14] + getitem_2235: "f32[768][1]cuda:0" = _foreach_sqrt_1[15] + getitem_2236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[16] + getitem_2237: "f32[2304][1]cuda:0" = _foreach_sqrt_1[17] + getitem_2238: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[18] + getitem_2239: "f32[768][1]cuda:0" = _foreach_sqrt_1[19] + getitem_2240: "f32[768][1]cuda:0" = _foreach_sqrt_1[20] + getitem_2241: "f32[768][1]cuda:0" = _foreach_sqrt_1[21] + getitem_2242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[22] + getitem_2243: "f32[3072][1]cuda:0" = _foreach_sqrt_1[23] + getitem_2244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[24] + getitem_2245: "f32[768][1]cuda:0" = _foreach_sqrt_1[25] + getitem_2246: "f32[768][1]cuda:0" = _foreach_sqrt_1[26] + getitem_2247: "f32[768][1]cuda:0" = _foreach_sqrt_1[27] + getitem_2248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[28] + getitem_2249: "f32[2304][1]cuda:0" = _foreach_sqrt_1[29] + getitem_2250: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[30] + getitem_2251: "f32[768][1]cuda:0" = _foreach_sqrt_1[31] + getitem_2252: "f32[768][1]cuda:0" = _foreach_sqrt_1[32] + getitem_2253: "f32[768][1]cuda:0" = _foreach_sqrt_1[33] + getitem_2254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[34] + getitem_2255: "f32[3072][1]cuda:0" = _foreach_sqrt_1[35] + getitem_2256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[36] + getitem_2257: "f32[768][1]cuda:0" = _foreach_sqrt_1[37] + getitem_2258: "f32[768][1]cuda:0" = _foreach_sqrt_1[38] + getitem_2259: "f32[768][1]cuda:0" = _foreach_sqrt_1[39] + getitem_2260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[40] + getitem_2261: "f32[2304][1]cuda:0" = _foreach_sqrt_1[41] + getitem_2262: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[42] + getitem_2263: "f32[768][1]cuda:0" = _foreach_sqrt_1[43] + getitem_2264: "f32[768][1]cuda:0" = _foreach_sqrt_1[44] + getitem_2265: "f32[768][1]cuda:0" = _foreach_sqrt_1[45] + getitem_2266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[46] + getitem_2267: "f32[3072][1]cuda:0" = _foreach_sqrt_1[47] + getitem_2268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[48] + getitem_2269: "f32[768][1]cuda:0" = _foreach_sqrt_1[49] + getitem_2270: "f32[768][1]cuda:0" = _foreach_sqrt_1[50] + getitem_2271: "f32[768][1]cuda:0" = _foreach_sqrt_1[51] + getitem_2272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[52] + getitem_2273: "f32[2304][1]cuda:0" = _foreach_sqrt_1[53] + getitem_2274: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[54] + getitem_2275: "f32[768][1]cuda:0" = _foreach_sqrt_1[55] + getitem_2276: "f32[768][1]cuda:0" = _foreach_sqrt_1[56] + getitem_2277: "f32[768][1]cuda:0" = _foreach_sqrt_1[57] + getitem_2278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[58] + getitem_2279: "f32[3072][1]cuda:0" = _foreach_sqrt_1[59] + getitem_2280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[60] + getitem_2281: "f32[768][1]cuda:0" = _foreach_sqrt_1[61] + getitem_2282: "f32[768][1]cuda:0" = _foreach_sqrt_1[62] + getitem_2283: "f32[768][1]cuda:0" = _foreach_sqrt_1[63] + getitem_2284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[64] + getitem_2285: "f32[2304][1]cuda:0" = _foreach_sqrt_1[65] + getitem_2286: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[66] + getitem_2287: "f32[768][1]cuda:0" = _foreach_sqrt_1[67] + getitem_2288: "f32[768][1]cuda:0" = _foreach_sqrt_1[68] + getitem_2289: "f32[768][1]cuda:0" = _foreach_sqrt_1[69] + getitem_2290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[70] + getitem_2291: "f32[3072][1]cuda:0" = _foreach_sqrt_1[71] + getitem_2292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[72] + getitem_2293: "f32[768][1]cuda:0" = _foreach_sqrt_1[73] + getitem_2294: "f32[768][1]cuda:0" = _foreach_sqrt_1[74] + getitem_2295: "f32[768][1]cuda:0" = _foreach_sqrt_1[75] + getitem_2296: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[76] + getitem_2297: "f32[2304][1]cuda:0" = _foreach_sqrt_1[77] + getitem_2298: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[78] + getitem_2299: "f32[768][1]cuda:0" = _foreach_sqrt_1[79] + getitem_2300: "f32[768][1]cuda:0" = _foreach_sqrt_1[80] + getitem_2301: "f32[768][1]cuda:0" = _foreach_sqrt_1[81] + getitem_2302: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[82] + getitem_2303: "f32[3072][1]cuda:0" = _foreach_sqrt_1[83] + getitem_2304: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[84] + getitem_2305: "f32[768][1]cuda:0" = _foreach_sqrt_1[85] + getitem_2306: "f32[768][1]cuda:0" = _foreach_sqrt_1[86] + getitem_2307: "f32[768][1]cuda:0" = _foreach_sqrt_1[87] + getitem_2308: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[88] + getitem_2309: "f32[2304][1]cuda:0" = _foreach_sqrt_1[89] + getitem_2310: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[90] + getitem_2311: "f32[768][1]cuda:0" = _foreach_sqrt_1[91] + getitem_2312: "f32[768][1]cuda:0" = _foreach_sqrt_1[92] + getitem_2313: "f32[768][1]cuda:0" = _foreach_sqrt_1[93] + getitem_2314: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[94] + getitem_2315: "f32[3072][1]cuda:0" = _foreach_sqrt_1[95] + getitem_2316: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[96] + getitem_2317: "f32[768][1]cuda:0" = _foreach_sqrt_1[97] + getitem_2318: "f32[768][1]cuda:0" = _foreach_sqrt_1[98] + getitem_2319: "f32[768][1]cuda:0" = _foreach_sqrt_1[99] + getitem_2320: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[100] + getitem_2321: "f32[2304][1]cuda:0" = _foreach_sqrt_1[101] + getitem_2322: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[102] + getitem_2323: "f32[768][1]cuda:0" = _foreach_sqrt_1[103] + getitem_2324: "f32[768][1]cuda:0" = _foreach_sqrt_1[104] + getitem_2325: "f32[768][1]cuda:0" = _foreach_sqrt_1[105] + getitem_2326: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[106] + getitem_2327: "f32[3072][1]cuda:0" = _foreach_sqrt_1[107] + getitem_2328: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[108] + getitem_2329: "f32[768][1]cuda:0" = _foreach_sqrt_1[109] + getitem_2330: "f32[768][1]cuda:0" = _foreach_sqrt_1[110] + getitem_2331: "f32[768][1]cuda:0" = _foreach_sqrt_1[111] + getitem_2332: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[112] + getitem_2333: "f32[2304][1]cuda:0" = _foreach_sqrt_1[113] + getitem_2334: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[114] + getitem_2335: "f32[768][1]cuda:0" = _foreach_sqrt_1[115] + getitem_2336: "f32[768][1]cuda:0" = _foreach_sqrt_1[116] + getitem_2337: "f32[768][1]cuda:0" = _foreach_sqrt_1[117] + getitem_2338: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[118] + getitem_2339: "f32[3072][1]cuda:0" = _foreach_sqrt_1[119] + getitem_2340: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[120] + getitem_2341: "f32[768][1]cuda:0" = _foreach_sqrt_1[121] + getitem_2342: "f32[768][1]cuda:0" = _foreach_sqrt_1[122] + getitem_2343: "f32[768][1]cuda:0" = _foreach_sqrt_1[123] + getitem_2344: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[124] + getitem_2345: "f32[2304][1]cuda:0" = _foreach_sqrt_1[125] + getitem_2346: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[126] + getitem_2347: "f32[768][1]cuda:0" = _foreach_sqrt_1[127] + getitem_2348: "f32[768][1]cuda:0" = _foreach_sqrt_1[128] + getitem_2349: "f32[768][1]cuda:0" = _foreach_sqrt_1[129] + getitem_2350: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[130] + getitem_2351: "f32[3072][1]cuda:0" = _foreach_sqrt_1[131] + getitem_2352: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[132] + getitem_2353: "f32[768][1]cuda:0" = _foreach_sqrt_1[133] + getitem_2354: "f32[768][1]cuda:0" = _foreach_sqrt_1[134] + getitem_2355: "f32[768][1]cuda:0" = _foreach_sqrt_1[135] + getitem_2356: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[136] + getitem_2357: "f32[2304][1]cuda:0" = _foreach_sqrt_1[137] + getitem_2358: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[138] + getitem_2359: "f32[768][1]cuda:0" = _foreach_sqrt_1[139] + getitem_2360: "f32[768][1]cuda:0" = _foreach_sqrt_1[140] + getitem_2361: "f32[768][1]cuda:0" = _foreach_sqrt_1[141] + getitem_2362: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[142] + getitem_2363: "f32[3072][1]cuda:0" = _foreach_sqrt_1[143] + getitem_2364: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[144] + getitem_2365: "f32[768][1]cuda:0" = _foreach_sqrt_1[145] + getitem_2366: "f32[768][1]cuda:0" = _foreach_sqrt_1[146] + getitem_2367: "f32[768][1]cuda:0" = _foreach_sqrt_1[147]; _foreach_sqrt_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt) + _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]); getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None + getitem_2368: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_1[0] + getitem_2369: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_1[1] + getitem_2370: "f32[768][1]cuda:0" = _foreach_div_1[2] + getitem_2371: "f32[768][1]cuda:0" = _foreach_div_1[3] + getitem_2372: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[4] + getitem_2373: "f32[2304][1]cuda:0" = _foreach_div_1[5] + getitem_2374: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[6] + getitem_2375: "f32[768][1]cuda:0" = _foreach_div_1[7] + getitem_2376: "f32[768][1]cuda:0" = _foreach_div_1[8] + getitem_2377: "f32[768][1]cuda:0" = _foreach_div_1[9] + getitem_2378: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[10] + getitem_2379: "f32[3072][1]cuda:0" = _foreach_div_1[11] + getitem_2380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[12] + getitem_2381: "f32[768][1]cuda:0" = _foreach_div_1[13] + getitem_2382: "f32[768][1]cuda:0" = _foreach_div_1[14] + getitem_2383: "f32[768][1]cuda:0" = _foreach_div_1[15] + getitem_2384: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[16] + getitem_2385: "f32[2304][1]cuda:0" = _foreach_div_1[17] + getitem_2386: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[18] + getitem_2387: "f32[768][1]cuda:0" = _foreach_div_1[19] + getitem_2388: "f32[768][1]cuda:0" = _foreach_div_1[20] + getitem_2389: "f32[768][1]cuda:0" = _foreach_div_1[21] + getitem_2390: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[22] + getitem_2391: "f32[3072][1]cuda:0" = _foreach_div_1[23] + getitem_2392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[24] + getitem_2393: "f32[768][1]cuda:0" = _foreach_div_1[25] + getitem_2394: "f32[768][1]cuda:0" = _foreach_div_1[26] + getitem_2395: "f32[768][1]cuda:0" = _foreach_div_1[27] + getitem_2396: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[28] + getitem_2397: "f32[2304][1]cuda:0" = _foreach_div_1[29] + getitem_2398: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[30] + getitem_2399: "f32[768][1]cuda:0" = _foreach_div_1[31] + getitem_2400: "f32[768][1]cuda:0" = _foreach_div_1[32] + getitem_2401: "f32[768][1]cuda:0" = _foreach_div_1[33] + getitem_2402: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[34] + getitem_2403: "f32[3072][1]cuda:0" = _foreach_div_1[35] + getitem_2404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[36] + getitem_2405: "f32[768][1]cuda:0" = _foreach_div_1[37] + getitem_2406: "f32[768][1]cuda:0" = _foreach_div_1[38] + getitem_2407: "f32[768][1]cuda:0" = _foreach_div_1[39] + getitem_2408: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[40] + getitem_2409: "f32[2304][1]cuda:0" = _foreach_div_1[41] + getitem_2410: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[42] + getitem_2411: "f32[768][1]cuda:0" = _foreach_div_1[43] + getitem_2412: "f32[768][1]cuda:0" = _foreach_div_1[44] + getitem_2413: "f32[768][1]cuda:0" = _foreach_div_1[45] + getitem_2414: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[46] + getitem_2415: "f32[3072][1]cuda:0" = _foreach_div_1[47] + getitem_2416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[48] + getitem_2417: "f32[768][1]cuda:0" = _foreach_div_1[49] + getitem_2418: "f32[768][1]cuda:0" = _foreach_div_1[50] + getitem_2419: "f32[768][1]cuda:0" = _foreach_div_1[51] + getitem_2420: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[52] + getitem_2421: "f32[2304][1]cuda:0" = _foreach_div_1[53] + getitem_2422: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[54] + getitem_2423: "f32[768][1]cuda:0" = _foreach_div_1[55] + getitem_2424: "f32[768][1]cuda:0" = _foreach_div_1[56] + getitem_2425: "f32[768][1]cuda:0" = _foreach_div_1[57] + getitem_2426: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[58] + getitem_2427: "f32[3072][1]cuda:0" = _foreach_div_1[59] + getitem_2428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[60] + getitem_2429: "f32[768][1]cuda:0" = _foreach_div_1[61] + getitem_2430: "f32[768][1]cuda:0" = _foreach_div_1[62] + getitem_2431: "f32[768][1]cuda:0" = _foreach_div_1[63] + getitem_2432: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[64] + getitem_2433: "f32[2304][1]cuda:0" = _foreach_div_1[65] + getitem_2434: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[66] + getitem_2435: "f32[768][1]cuda:0" = _foreach_div_1[67] + getitem_2436: "f32[768][1]cuda:0" = _foreach_div_1[68] + getitem_2437: "f32[768][1]cuda:0" = _foreach_div_1[69] + getitem_2438: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[70] + getitem_2439: "f32[3072][1]cuda:0" = _foreach_div_1[71] + getitem_2440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[72] + getitem_2441: "f32[768][1]cuda:0" = _foreach_div_1[73] + getitem_2442: "f32[768][1]cuda:0" = _foreach_div_1[74] + getitem_2443: "f32[768][1]cuda:0" = _foreach_div_1[75] + getitem_2444: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[76] + getitem_2445: "f32[2304][1]cuda:0" = _foreach_div_1[77] + getitem_2446: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[78] + getitem_2447: "f32[768][1]cuda:0" = _foreach_div_1[79] + getitem_2448: "f32[768][1]cuda:0" = _foreach_div_1[80] + getitem_2449: "f32[768][1]cuda:0" = _foreach_div_1[81] + getitem_2450: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[82] + getitem_2451: "f32[3072][1]cuda:0" = _foreach_div_1[83] + getitem_2452: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[84] + getitem_2453: "f32[768][1]cuda:0" = _foreach_div_1[85] + getitem_2454: "f32[768][1]cuda:0" = _foreach_div_1[86] + getitem_2455: "f32[768][1]cuda:0" = _foreach_div_1[87] + getitem_2456: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[88] + getitem_2457: "f32[2304][1]cuda:0" = _foreach_div_1[89] + getitem_2458: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[90] + getitem_2459: "f32[768][1]cuda:0" = _foreach_div_1[91] + getitem_2460: "f32[768][1]cuda:0" = _foreach_div_1[92] + getitem_2461: "f32[768][1]cuda:0" = _foreach_div_1[93] + getitem_2462: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[94] + getitem_2463: "f32[3072][1]cuda:0" = _foreach_div_1[95] + getitem_2464: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[96] + getitem_2465: "f32[768][1]cuda:0" = _foreach_div_1[97] + getitem_2466: "f32[768][1]cuda:0" = _foreach_div_1[98] + getitem_2467: "f32[768][1]cuda:0" = _foreach_div_1[99] + getitem_2468: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[100] + getitem_2469: "f32[2304][1]cuda:0" = _foreach_div_1[101] + getitem_2470: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[102] + getitem_2471: "f32[768][1]cuda:0" = _foreach_div_1[103] + getitem_2472: "f32[768][1]cuda:0" = _foreach_div_1[104] + getitem_2473: "f32[768][1]cuda:0" = _foreach_div_1[105] + getitem_2474: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[106] + getitem_2475: "f32[3072][1]cuda:0" = _foreach_div_1[107] + getitem_2476: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[108] + getitem_2477: "f32[768][1]cuda:0" = _foreach_div_1[109] + getitem_2478: "f32[768][1]cuda:0" = _foreach_div_1[110] + getitem_2479: "f32[768][1]cuda:0" = _foreach_div_1[111] + getitem_2480: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[112] + getitem_2481: "f32[2304][1]cuda:0" = _foreach_div_1[113] + getitem_2482: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[114] + getitem_2483: "f32[768][1]cuda:0" = _foreach_div_1[115] + getitem_2484: "f32[768][1]cuda:0" = _foreach_div_1[116] + getitem_2485: "f32[768][1]cuda:0" = _foreach_div_1[117] + getitem_2486: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[118] + getitem_2487: "f32[3072][1]cuda:0" = _foreach_div_1[119] + getitem_2488: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[120] + getitem_2489: "f32[768][1]cuda:0" = _foreach_div_1[121] + getitem_2490: "f32[768][1]cuda:0" = _foreach_div_1[122] + getitem_2491: "f32[768][1]cuda:0" = _foreach_div_1[123] + getitem_2492: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[124] + getitem_2493: "f32[2304][1]cuda:0" = _foreach_div_1[125] + getitem_2494: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[126] + getitem_2495: "f32[768][1]cuda:0" = _foreach_div_1[127] + getitem_2496: "f32[768][1]cuda:0" = _foreach_div_1[128] + getitem_2497: "f32[768][1]cuda:0" = _foreach_div_1[129] + getitem_2498: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[130] + getitem_2499: "f32[3072][1]cuda:0" = _foreach_div_1[131] + getitem_2500: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[132] + getitem_2501: "f32[768][1]cuda:0" = _foreach_div_1[133] + getitem_2502: "f32[768][1]cuda:0" = _foreach_div_1[134] + getitem_2503: "f32[768][1]cuda:0" = _foreach_div_1[135] + getitem_2504: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[136] + getitem_2505: "f32[2304][1]cuda:0" = _foreach_div_1[137] + getitem_2506: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[138] + getitem_2507: "f32[768][1]cuda:0" = _foreach_div_1[139] + getitem_2508: "f32[768][1]cuda:0" = _foreach_div_1[140] + getitem_2509: "f32[768][1]cuda:0" = _foreach_div_1[141] + getitem_2510: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[142] + getitem_2511: "f32[3072][1]cuda:0" = _foreach_div_1[143] + getitem_2512: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[144] + getitem_2513: "f32[768][1]cuda:0" = _foreach_div_1[145] + getitem_2514: "f32[768][1]cuda:0" = _foreach_div_1[146] + getitem_2515: "f32[768][1]cuda:0" = _foreach_div_1[147]; _foreach_div_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps) + _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08); getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None + getitem_2516: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_3[0] + getitem_2517: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_3[1] + getitem_2518: "f32[768][1]cuda:0" = _foreach_add_3[2] + getitem_2519: "f32[768][1]cuda:0" = _foreach_add_3[3] + getitem_2520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[4] + getitem_2521: "f32[2304][1]cuda:0" = _foreach_add_3[5] + getitem_2522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[6] + getitem_2523: "f32[768][1]cuda:0" = _foreach_add_3[7] + getitem_2524: "f32[768][1]cuda:0" = _foreach_add_3[8] + getitem_2525: "f32[768][1]cuda:0" = _foreach_add_3[9] + getitem_2526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[10] + getitem_2527: "f32[3072][1]cuda:0" = _foreach_add_3[11] + getitem_2528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[12] + getitem_2529: "f32[768][1]cuda:0" = _foreach_add_3[13] + getitem_2530: "f32[768][1]cuda:0" = _foreach_add_3[14] + getitem_2531: "f32[768][1]cuda:0" = _foreach_add_3[15] + getitem_2532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[16] + getitem_2533: "f32[2304][1]cuda:0" = _foreach_add_3[17] + getitem_2534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[18] + getitem_2535: "f32[768][1]cuda:0" = _foreach_add_3[19] + getitem_2536: "f32[768][1]cuda:0" = _foreach_add_3[20] + getitem_2537: "f32[768][1]cuda:0" = _foreach_add_3[21] + getitem_2538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[22] + getitem_2539: "f32[3072][1]cuda:0" = _foreach_add_3[23] + getitem_2540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[24] + getitem_2541: "f32[768][1]cuda:0" = _foreach_add_3[25] + getitem_2542: "f32[768][1]cuda:0" = _foreach_add_3[26] + getitem_2543: "f32[768][1]cuda:0" = _foreach_add_3[27] + getitem_2544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[28] + getitem_2545: "f32[2304][1]cuda:0" = _foreach_add_3[29] + getitem_2546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[30] + getitem_2547: "f32[768][1]cuda:0" = _foreach_add_3[31] + getitem_2548: "f32[768][1]cuda:0" = _foreach_add_3[32] + getitem_2549: "f32[768][1]cuda:0" = _foreach_add_3[33] + getitem_2550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[34] + getitem_2551: "f32[3072][1]cuda:0" = _foreach_add_3[35] + getitem_2552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[36] + getitem_2553: "f32[768][1]cuda:0" = _foreach_add_3[37] + getitem_2554: "f32[768][1]cuda:0" = _foreach_add_3[38] + getitem_2555: "f32[768][1]cuda:0" = _foreach_add_3[39] + getitem_2556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[40] + getitem_2557: "f32[2304][1]cuda:0" = _foreach_add_3[41] + getitem_2558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[42] + getitem_2559: "f32[768][1]cuda:0" = _foreach_add_3[43] + getitem_2560: "f32[768][1]cuda:0" = _foreach_add_3[44] + getitem_2561: "f32[768][1]cuda:0" = _foreach_add_3[45] + getitem_2562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[46] + getitem_2563: "f32[3072][1]cuda:0" = _foreach_add_3[47] + getitem_2564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[48] + getitem_2565: "f32[768][1]cuda:0" = _foreach_add_3[49] + getitem_2566: "f32[768][1]cuda:0" = _foreach_add_3[50] + getitem_2567: "f32[768][1]cuda:0" = _foreach_add_3[51] + getitem_2568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[52] + getitem_2569: "f32[2304][1]cuda:0" = _foreach_add_3[53] + getitem_2570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[54] + getitem_2571: "f32[768][1]cuda:0" = _foreach_add_3[55] + getitem_2572: "f32[768][1]cuda:0" = _foreach_add_3[56] + getitem_2573: "f32[768][1]cuda:0" = _foreach_add_3[57] + getitem_2574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[58] + getitem_2575: "f32[3072][1]cuda:0" = _foreach_add_3[59] + getitem_2576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[60] + getitem_2577: "f32[768][1]cuda:0" = _foreach_add_3[61] + getitem_2578: "f32[768][1]cuda:0" = _foreach_add_3[62] + getitem_2579: "f32[768][1]cuda:0" = _foreach_add_3[63] + getitem_2580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[64] + getitem_2581: "f32[2304][1]cuda:0" = _foreach_add_3[65] + getitem_2582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[66] + getitem_2583: "f32[768][1]cuda:0" = _foreach_add_3[67] + getitem_2584: "f32[768][1]cuda:0" = _foreach_add_3[68] + getitem_2585: "f32[768][1]cuda:0" = _foreach_add_3[69] + getitem_2586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[70] + getitem_2587: "f32[3072][1]cuda:0" = _foreach_add_3[71] + getitem_2588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[72] + getitem_2589: "f32[768][1]cuda:0" = _foreach_add_3[73] + getitem_2590: "f32[768][1]cuda:0" = _foreach_add_3[74] + getitem_2591: "f32[768][1]cuda:0" = _foreach_add_3[75] + getitem_2592: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[76] + getitem_2593: "f32[2304][1]cuda:0" = _foreach_add_3[77] + getitem_2594: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[78] + getitem_2595: "f32[768][1]cuda:0" = _foreach_add_3[79] + getitem_2596: "f32[768][1]cuda:0" = _foreach_add_3[80] + getitem_2597: "f32[768][1]cuda:0" = _foreach_add_3[81] + getitem_2598: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[82] + getitem_2599: "f32[3072][1]cuda:0" = _foreach_add_3[83] + getitem_2600: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[84] + getitem_2601: "f32[768][1]cuda:0" = _foreach_add_3[85] + getitem_2602: "f32[768][1]cuda:0" = _foreach_add_3[86] + getitem_2603: "f32[768][1]cuda:0" = _foreach_add_3[87] + getitem_2604: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[88] + getitem_2605: "f32[2304][1]cuda:0" = _foreach_add_3[89] + getitem_2606: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[90] + getitem_2607: "f32[768][1]cuda:0" = _foreach_add_3[91] + getitem_2608: "f32[768][1]cuda:0" = _foreach_add_3[92] + getitem_2609: "f32[768][1]cuda:0" = _foreach_add_3[93] + getitem_2610: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[94] + getitem_2611: "f32[3072][1]cuda:0" = _foreach_add_3[95] + getitem_2612: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[96] + getitem_2613: "f32[768][1]cuda:0" = _foreach_add_3[97] + getitem_2614: "f32[768][1]cuda:0" = _foreach_add_3[98] + getitem_2615: "f32[768][1]cuda:0" = _foreach_add_3[99] + getitem_2616: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[100] + getitem_2617: "f32[2304][1]cuda:0" = _foreach_add_3[101] + getitem_2618: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[102] + getitem_2619: "f32[768][1]cuda:0" = _foreach_add_3[103] + getitem_2620: "f32[768][1]cuda:0" = _foreach_add_3[104] + getitem_2621: "f32[768][1]cuda:0" = _foreach_add_3[105] + getitem_2622: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[106] + getitem_2623: "f32[3072][1]cuda:0" = _foreach_add_3[107] + getitem_2624: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[108] + getitem_2625: "f32[768][1]cuda:0" = _foreach_add_3[109] + getitem_2626: "f32[768][1]cuda:0" = _foreach_add_3[110] + getitem_2627: "f32[768][1]cuda:0" = _foreach_add_3[111] + getitem_2628: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[112] + getitem_2629: "f32[2304][1]cuda:0" = _foreach_add_3[113] + getitem_2630: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[114] + getitem_2631: "f32[768][1]cuda:0" = _foreach_add_3[115] + getitem_2632: "f32[768][1]cuda:0" = _foreach_add_3[116] + getitem_2633: "f32[768][1]cuda:0" = _foreach_add_3[117] + getitem_2634: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[118] + getitem_2635: "f32[3072][1]cuda:0" = _foreach_add_3[119] + getitem_2636: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[120] + getitem_2637: "f32[768][1]cuda:0" = _foreach_add_3[121] + getitem_2638: "f32[768][1]cuda:0" = _foreach_add_3[122] + getitem_2639: "f32[768][1]cuda:0" = _foreach_add_3[123] + getitem_2640: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[124] + getitem_2641: "f32[2304][1]cuda:0" = _foreach_add_3[125] + getitem_2642: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[126] + getitem_2643: "f32[768][1]cuda:0" = _foreach_add_3[127] + getitem_2644: "f32[768][1]cuda:0" = _foreach_add_3[128] + getitem_2645: "f32[768][1]cuda:0" = _foreach_add_3[129] + getitem_2646: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[130] + getitem_2647: "f32[3072][1]cuda:0" = _foreach_add_3[131] + getitem_2648: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[132] + getitem_2649: "f32[768][1]cuda:0" = _foreach_add_3[133] + getitem_2650: "f32[768][1]cuda:0" = _foreach_add_3[134] + getitem_2651: "f32[768][1]cuda:0" = _foreach_add_3[135] + getitem_2652: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[136] + getitem_2653: "f32[2304][1]cuda:0" = _foreach_add_3[137] + getitem_2654: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[138] + getitem_2655: "f32[768][1]cuda:0" = _foreach_add_3[139] + getitem_2656: "f32[768][1]cuda:0" = _foreach_add_3[140] + getitem_2657: "f32[768][1]cuda:0" = _foreach_add_3[141] + getitem_2658: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[142] + getitem_2659: "f32[3072][1]cuda:0" = _foreach_add_3[143] + getitem_2660: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[144] + getitem_2661: "f32[768][1]cuda:0" = _foreach_add_3[145] + getitem_2662: "f32[768][1]cuda:0" = _foreach_add_3[146] + getitem_2663: "f32[768][1]cuda:0" = _foreach_add_3[147]; _foreach_add_3 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size) + _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]); getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None + getitem_2664: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_2[0] + getitem_2665: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_2[1] + getitem_2666: "f32[768][1]cuda:0" = _foreach_div_2[2] + getitem_2667: "f32[768][1]cuda:0" = _foreach_div_2[3] + getitem_2668: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[4] + getitem_2669: "f32[2304][1]cuda:0" = _foreach_div_2[5] + getitem_2670: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[6] + getitem_2671: "f32[768][1]cuda:0" = _foreach_div_2[7] + getitem_2672: "f32[768][1]cuda:0" = _foreach_div_2[8] + getitem_2673: "f32[768][1]cuda:0" = _foreach_div_2[9] + getitem_2674: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[10] + getitem_2675: "f32[3072][1]cuda:0" = _foreach_div_2[11] + getitem_2676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[12] + getitem_2677: "f32[768][1]cuda:0" = _foreach_div_2[13] + getitem_2678: "f32[768][1]cuda:0" = _foreach_div_2[14] + getitem_2679: "f32[768][1]cuda:0" = _foreach_div_2[15] + getitem_2680: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[16] + getitem_2681: "f32[2304][1]cuda:0" = _foreach_div_2[17] + getitem_2682: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[18] + getitem_2683: "f32[768][1]cuda:0" = _foreach_div_2[19] + getitem_2684: "f32[768][1]cuda:0" = _foreach_div_2[20] + getitem_2685: "f32[768][1]cuda:0" = _foreach_div_2[21] + getitem_2686: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[22] + getitem_2687: "f32[3072][1]cuda:0" = _foreach_div_2[23] + getitem_2688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[24] + getitem_2689: "f32[768][1]cuda:0" = _foreach_div_2[25] + getitem_2690: "f32[768][1]cuda:0" = _foreach_div_2[26] + getitem_2691: "f32[768][1]cuda:0" = _foreach_div_2[27] + getitem_2692: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[28] + getitem_2693: "f32[2304][1]cuda:0" = _foreach_div_2[29] + getitem_2694: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[30] + getitem_2695: "f32[768][1]cuda:0" = _foreach_div_2[31] + getitem_2696: "f32[768][1]cuda:0" = _foreach_div_2[32] + getitem_2697: "f32[768][1]cuda:0" = _foreach_div_2[33] + getitem_2698: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[34] + getitem_2699: "f32[3072][1]cuda:0" = _foreach_div_2[35] + getitem_2700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[36] + getitem_2701: "f32[768][1]cuda:0" = _foreach_div_2[37] + getitem_2702: "f32[768][1]cuda:0" = _foreach_div_2[38] + getitem_2703: "f32[768][1]cuda:0" = _foreach_div_2[39] + getitem_2704: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[40] + getitem_2705: "f32[2304][1]cuda:0" = _foreach_div_2[41] + getitem_2706: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[42] + getitem_2707: "f32[768][1]cuda:0" = _foreach_div_2[43] + getitem_2708: "f32[768][1]cuda:0" = _foreach_div_2[44] + getitem_2709: "f32[768][1]cuda:0" = _foreach_div_2[45] + getitem_2710: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[46] + getitem_2711: "f32[3072][1]cuda:0" = _foreach_div_2[47] + getitem_2712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[48] + getitem_2713: "f32[768][1]cuda:0" = _foreach_div_2[49] + getitem_2714: "f32[768][1]cuda:0" = _foreach_div_2[50] + getitem_2715: "f32[768][1]cuda:0" = _foreach_div_2[51] + getitem_2716: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[52] + getitem_2717: "f32[2304][1]cuda:0" = _foreach_div_2[53] + getitem_2718: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[54] + getitem_2719: "f32[768][1]cuda:0" = _foreach_div_2[55] + getitem_2720: "f32[768][1]cuda:0" = _foreach_div_2[56] + getitem_2721: "f32[768][1]cuda:0" = _foreach_div_2[57] + getitem_2722: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[58] + getitem_2723: "f32[3072][1]cuda:0" = _foreach_div_2[59] + getitem_2724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[60] + getitem_2725: "f32[768][1]cuda:0" = _foreach_div_2[61] + getitem_2726: "f32[768][1]cuda:0" = _foreach_div_2[62] + getitem_2727: "f32[768][1]cuda:0" = _foreach_div_2[63] + getitem_2728: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[64] + getitem_2729: "f32[2304][1]cuda:0" = _foreach_div_2[65] + getitem_2730: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[66] + getitem_2731: "f32[768][1]cuda:0" = _foreach_div_2[67] + getitem_2732: "f32[768][1]cuda:0" = _foreach_div_2[68] + getitem_2733: "f32[768][1]cuda:0" = _foreach_div_2[69] + getitem_2734: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[70] + getitem_2735: "f32[3072][1]cuda:0" = _foreach_div_2[71] + getitem_2736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[72] + getitem_2737: "f32[768][1]cuda:0" = _foreach_div_2[73] + getitem_2738: "f32[768][1]cuda:0" = _foreach_div_2[74] + getitem_2739: "f32[768][1]cuda:0" = _foreach_div_2[75] + getitem_2740: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[76] + getitem_2741: "f32[2304][1]cuda:0" = _foreach_div_2[77] + getitem_2742: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[78] + getitem_2743: "f32[768][1]cuda:0" = _foreach_div_2[79] + getitem_2744: "f32[768][1]cuda:0" = _foreach_div_2[80] + getitem_2745: "f32[768][1]cuda:0" = _foreach_div_2[81] + getitem_2746: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[82] + getitem_2747: "f32[3072][1]cuda:0" = _foreach_div_2[83] + getitem_2748: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[84] + getitem_2749: "f32[768][1]cuda:0" = _foreach_div_2[85] + getitem_2750: "f32[768][1]cuda:0" = _foreach_div_2[86] + getitem_2751: "f32[768][1]cuda:0" = _foreach_div_2[87] + getitem_2752: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[88] + getitem_2753: "f32[2304][1]cuda:0" = _foreach_div_2[89] + getitem_2754: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[90] + getitem_2755: "f32[768][1]cuda:0" = _foreach_div_2[91] + getitem_2756: "f32[768][1]cuda:0" = _foreach_div_2[92] + getitem_2757: "f32[768][1]cuda:0" = _foreach_div_2[93] + getitem_2758: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[94] + getitem_2759: "f32[3072][1]cuda:0" = _foreach_div_2[95] + getitem_2760: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[96] + getitem_2761: "f32[768][1]cuda:0" = _foreach_div_2[97] + getitem_2762: "f32[768][1]cuda:0" = _foreach_div_2[98] + getitem_2763: "f32[768][1]cuda:0" = _foreach_div_2[99] + getitem_2764: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[100] + getitem_2765: "f32[2304][1]cuda:0" = _foreach_div_2[101] + getitem_2766: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[102] + getitem_2767: "f32[768][1]cuda:0" = _foreach_div_2[103] + getitem_2768: "f32[768][1]cuda:0" = _foreach_div_2[104] + getitem_2769: "f32[768][1]cuda:0" = _foreach_div_2[105] + getitem_2770: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[106] + getitem_2771: "f32[3072][1]cuda:0" = _foreach_div_2[107] + getitem_2772: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[108] + getitem_2773: "f32[768][1]cuda:0" = _foreach_div_2[109] + getitem_2774: "f32[768][1]cuda:0" = _foreach_div_2[110] + getitem_2775: "f32[768][1]cuda:0" = _foreach_div_2[111] + getitem_2776: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[112] + getitem_2777: "f32[2304][1]cuda:0" = _foreach_div_2[113] + getitem_2778: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[114] + getitem_2779: "f32[768][1]cuda:0" = _foreach_div_2[115] + getitem_2780: "f32[768][1]cuda:0" = _foreach_div_2[116] + getitem_2781: "f32[768][1]cuda:0" = _foreach_div_2[117] + getitem_2782: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[118] + getitem_2783: "f32[3072][1]cuda:0" = _foreach_div_2[119] + getitem_2784: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[120] + getitem_2785: "f32[768][1]cuda:0" = _foreach_div_2[121] + getitem_2786: "f32[768][1]cuda:0" = _foreach_div_2[122] + getitem_2787: "f32[768][1]cuda:0" = _foreach_div_2[123] + getitem_2788: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[124] + getitem_2789: "f32[2304][1]cuda:0" = _foreach_div_2[125] + getitem_2790: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[126] + getitem_2791: "f32[768][1]cuda:0" = _foreach_div_2[127] + getitem_2792: "f32[768][1]cuda:0" = _foreach_div_2[128] + getitem_2793: "f32[768][1]cuda:0" = _foreach_div_2[129] + getitem_2794: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[130] + getitem_2795: "f32[3072][1]cuda:0" = _foreach_div_2[131] + getitem_2796: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[132] + getitem_2797: "f32[768][1]cuda:0" = _foreach_div_2[133] + getitem_2798: "f32[768][1]cuda:0" = _foreach_div_2[134] + getitem_2799: "f32[768][1]cuda:0" = _foreach_div_2[135] + getitem_2800: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[136] + getitem_2801: "f32[2304][1]cuda:0" = _foreach_div_2[137] + getitem_2802: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[138] + getitem_2803: "f32[768][1]cuda:0" = _foreach_div_2[139] + getitem_2804: "f32[768][1]cuda:0" = _foreach_div_2[140] + getitem_2805: "f32[768][1]cuda:0" = _foreach_div_2[141] + getitem_2806: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[142] + getitem_2807: "f32[3072][1]cuda:0" = _foreach_div_2[143] + getitem_2808: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[144] + getitem_2809: "f32[768][1]cuda:0" = _foreach_div_2[145] + getitem_2810: "f32[768][1]cuda:0" = _foreach_div_2[146] + getitem_2811: "f32[768][1]cuda:0" = _foreach_div_2[147]; _foreach_div_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt) + _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]); getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None + getitem_2812: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_3[0] + getitem_2813: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_3[1] + getitem_2814: "f32[768][1]cuda:0" = _foreach_div_3[2] + getitem_2815: "f32[768][1]cuda:0" = _foreach_div_3[3] + getitem_2816: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[4] + getitem_2817: "f32[2304][1]cuda:0" = _foreach_div_3[5] + getitem_2818: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[6] + getitem_2819: "f32[768][1]cuda:0" = _foreach_div_3[7] + getitem_2820: "f32[768][1]cuda:0" = _foreach_div_3[8] + getitem_2821: "f32[768][1]cuda:0" = _foreach_div_3[9] + getitem_2822: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[10] + getitem_2823: "f32[3072][1]cuda:0" = _foreach_div_3[11] + getitem_2824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[12] + getitem_2825: "f32[768][1]cuda:0" = _foreach_div_3[13] + getitem_2826: "f32[768][1]cuda:0" = _foreach_div_3[14] + getitem_2827: "f32[768][1]cuda:0" = _foreach_div_3[15] + getitem_2828: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[16] + getitem_2829: "f32[2304][1]cuda:0" = _foreach_div_3[17] + getitem_2830: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[18] + getitem_2831: "f32[768][1]cuda:0" = _foreach_div_3[19] + getitem_2832: "f32[768][1]cuda:0" = _foreach_div_3[20] + getitem_2833: "f32[768][1]cuda:0" = _foreach_div_3[21] + getitem_2834: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[22] + getitem_2835: "f32[3072][1]cuda:0" = _foreach_div_3[23] + getitem_2836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[24] + getitem_2837: "f32[768][1]cuda:0" = _foreach_div_3[25] + getitem_2838: "f32[768][1]cuda:0" = _foreach_div_3[26] + getitem_2839: "f32[768][1]cuda:0" = _foreach_div_3[27] + getitem_2840: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[28] + getitem_2841: "f32[2304][1]cuda:0" = _foreach_div_3[29] + getitem_2842: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[30] + getitem_2843: "f32[768][1]cuda:0" = _foreach_div_3[31] + getitem_2844: "f32[768][1]cuda:0" = _foreach_div_3[32] + getitem_2845: "f32[768][1]cuda:0" = _foreach_div_3[33] + getitem_2846: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[34] + getitem_2847: "f32[3072][1]cuda:0" = _foreach_div_3[35] + getitem_2848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[36] + getitem_2849: "f32[768][1]cuda:0" = _foreach_div_3[37] + getitem_2850: "f32[768][1]cuda:0" = _foreach_div_3[38] + getitem_2851: "f32[768][1]cuda:0" = _foreach_div_3[39] + getitem_2852: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[40] + getitem_2853: "f32[2304][1]cuda:0" = _foreach_div_3[41] + getitem_2854: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[42] + getitem_2855: "f32[768][1]cuda:0" = _foreach_div_3[43] + getitem_2856: "f32[768][1]cuda:0" = _foreach_div_3[44] + getitem_2857: "f32[768][1]cuda:0" = _foreach_div_3[45] + getitem_2858: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[46] + getitem_2859: "f32[3072][1]cuda:0" = _foreach_div_3[47] + getitem_2860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[48] + getitem_2861: "f32[768][1]cuda:0" = _foreach_div_3[49] + getitem_2862: "f32[768][1]cuda:0" = _foreach_div_3[50] + getitem_2863: "f32[768][1]cuda:0" = _foreach_div_3[51] + getitem_2864: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[52] + getitem_2865: "f32[2304][1]cuda:0" = _foreach_div_3[53] + getitem_2866: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[54] + getitem_2867: "f32[768][1]cuda:0" = _foreach_div_3[55] + getitem_2868: "f32[768][1]cuda:0" = _foreach_div_3[56] + getitem_2869: "f32[768][1]cuda:0" = _foreach_div_3[57] + getitem_2870: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[58] + getitem_2871: "f32[3072][1]cuda:0" = _foreach_div_3[59] + getitem_2872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[60] + getitem_2873: "f32[768][1]cuda:0" = _foreach_div_3[61] + getitem_2874: "f32[768][1]cuda:0" = _foreach_div_3[62] + getitem_2875: "f32[768][1]cuda:0" = _foreach_div_3[63] + getitem_2876: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[64] + getitem_2877: "f32[2304][1]cuda:0" = _foreach_div_3[65] + getitem_2878: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[66] + getitem_2879: "f32[768][1]cuda:0" = _foreach_div_3[67] + getitem_2880: "f32[768][1]cuda:0" = _foreach_div_3[68] + getitem_2881: "f32[768][1]cuda:0" = _foreach_div_3[69] + getitem_2882: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[70] + getitem_2883: "f32[3072][1]cuda:0" = _foreach_div_3[71] + getitem_2884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[72] + getitem_2885: "f32[768][1]cuda:0" = _foreach_div_3[73] + getitem_2886: "f32[768][1]cuda:0" = _foreach_div_3[74] + getitem_2887: "f32[768][1]cuda:0" = _foreach_div_3[75] + getitem_2888: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[76] + getitem_2889: "f32[2304][1]cuda:0" = _foreach_div_3[77] + getitem_2890: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[78] + getitem_2891: "f32[768][1]cuda:0" = _foreach_div_3[79] + getitem_2892: "f32[768][1]cuda:0" = _foreach_div_3[80] + getitem_2893: "f32[768][1]cuda:0" = _foreach_div_3[81] + getitem_2894: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[82] + getitem_2895: "f32[3072][1]cuda:0" = _foreach_div_3[83] + getitem_2896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[84] + getitem_2897: "f32[768][1]cuda:0" = _foreach_div_3[85] + getitem_2898: "f32[768][1]cuda:0" = _foreach_div_3[86] + getitem_2899: "f32[768][1]cuda:0" = _foreach_div_3[87] + getitem_2900: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[88] + getitem_2901: "f32[2304][1]cuda:0" = _foreach_div_3[89] + getitem_2902: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[90] + getitem_2903: "f32[768][1]cuda:0" = _foreach_div_3[91] + getitem_2904: "f32[768][1]cuda:0" = _foreach_div_3[92] + getitem_2905: "f32[768][1]cuda:0" = _foreach_div_3[93] + getitem_2906: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[94] + getitem_2907: "f32[3072][1]cuda:0" = _foreach_div_3[95] + getitem_2908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[96] + getitem_2909: "f32[768][1]cuda:0" = _foreach_div_3[97] + getitem_2910: "f32[768][1]cuda:0" = _foreach_div_3[98] + getitem_2911: "f32[768][1]cuda:0" = _foreach_div_3[99] + getitem_2912: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[100] + getitem_2913: "f32[2304][1]cuda:0" = _foreach_div_3[101] + getitem_2914: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[102] + getitem_2915: "f32[768][1]cuda:0" = _foreach_div_3[103] + getitem_2916: "f32[768][1]cuda:0" = _foreach_div_3[104] + getitem_2917: "f32[768][1]cuda:0" = _foreach_div_3[105] + getitem_2918: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[106] + getitem_2919: "f32[3072][1]cuda:0" = _foreach_div_3[107] + getitem_2920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[108] + getitem_2921: "f32[768][1]cuda:0" = _foreach_div_3[109] + getitem_2922: "f32[768][1]cuda:0" = _foreach_div_3[110] + getitem_2923: "f32[768][1]cuda:0" = _foreach_div_3[111] + getitem_2924: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[112] + getitem_2925: "f32[2304][1]cuda:0" = _foreach_div_3[113] + getitem_2926: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[114] + getitem_2927: "f32[768][1]cuda:0" = _foreach_div_3[115] + getitem_2928: "f32[768][1]cuda:0" = _foreach_div_3[116] + getitem_2929: "f32[768][1]cuda:0" = _foreach_div_3[117] + getitem_2930: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[118] + getitem_2931: "f32[3072][1]cuda:0" = _foreach_div_3[119] + getitem_2932: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[120] + getitem_2933: "f32[768][1]cuda:0" = _foreach_div_3[121] + getitem_2934: "f32[768][1]cuda:0" = _foreach_div_3[122] + getitem_2935: "f32[768][1]cuda:0" = _foreach_div_3[123] + getitem_2936: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[124] + getitem_2937: "f32[2304][1]cuda:0" = _foreach_div_3[125] + getitem_2938: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[126] + getitem_2939: "f32[768][1]cuda:0" = _foreach_div_3[127] + getitem_2940: "f32[768][1]cuda:0" = _foreach_div_3[128] + getitem_2941: "f32[768][1]cuda:0" = _foreach_div_3[129] + getitem_2942: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[130] + getitem_2943: "f32[3072][1]cuda:0" = _foreach_div_3[131] + getitem_2944: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[132] + getitem_2945: "f32[768][1]cuda:0" = _foreach_div_3[133] + getitem_2946: "f32[768][1]cuda:0" = _foreach_div_3[134] + getitem_2947: "f32[768][1]cuda:0" = _foreach_div_3[135] + getitem_2948: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[136] + getitem_2949: "f32[2304][1]cuda:0" = _foreach_div_3[137] + getitem_2950: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[138] + getitem_2951: "f32[768][1]cuda:0" = _foreach_div_3[139] + getitem_2952: "f32[768][1]cuda:0" = _foreach_div_3[140] + getitem_2953: "f32[768][1]cuda:0" = _foreach_div_3[141] + getitem_2954: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[142] + getitem_2955: "f32[3072][1]cuda:0" = _foreach_div_3[143] + getitem_2956: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[144] + getitem_2957: "f32[768][1]cuda:0" = _foreach_div_3[145] + getitem_2958: "f32[768][1]cuda:0" = _foreach_div_3[146] + getitem_2959: "f32[768][1]cuda:0" = _foreach_div_3[147]; _foreach_div_3 = None + _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]); getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None + getitem_2960: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_4[0] + getitem_2961: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_4[1] + getitem_2962: "f32[768][1]cuda:0" = _foreach_add_4[2] + getitem_2963: "f32[768][1]cuda:0" = _foreach_add_4[3] + getitem_2964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[4] + getitem_2965: "f32[2304][1]cuda:0" = _foreach_add_4[5] + getitem_2966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[6] + getitem_2967: "f32[768][1]cuda:0" = _foreach_add_4[7] + getitem_2968: "f32[768][1]cuda:0" = _foreach_add_4[8] + getitem_2969: "f32[768][1]cuda:0" = _foreach_add_4[9] + getitem_2970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[10] + getitem_2971: "f32[3072][1]cuda:0" = _foreach_add_4[11] + getitem_2972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[12] + getitem_2973: "f32[768][1]cuda:0" = _foreach_add_4[13] + getitem_2974: "f32[768][1]cuda:0" = _foreach_add_4[14] + getitem_2975: "f32[768][1]cuda:0" = _foreach_add_4[15] + getitem_2976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[16] + getitem_2977: "f32[2304][1]cuda:0" = _foreach_add_4[17] + getitem_2978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[18] + getitem_2979: "f32[768][1]cuda:0" = _foreach_add_4[19] + getitem_2980: "f32[768][1]cuda:0" = _foreach_add_4[20] + getitem_2981: "f32[768][1]cuda:0" = _foreach_add_4[21] + getitem_2982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[22] + getitem_2983: "f32[3072][1]cuda:0" = _foreach_add_4[23] + getitem_2984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[24] + getitem_2985: "f32[768][1]cuda:0" = _foreach_add_4[25] + getitem_2986: "f32[768][1]cuda:0" = _foreach_add_4[26] + getitem_2987: "f32[768][1]cuda:0" = _foreach_add_4[27] + getitem_2988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[28] + getitem_2989: "f32[2304][1]cuda:0" = _foreach_add_4[29] + getitem_2990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[30] + getitem_2991: "f32[768][1]cuda:0" = _foreach_add_4[31] + getitem_2992: "f32[768][1]cuda:0" = _foreach_add_4[32] + getitem_2993: "f32[768][1]cuda:0" = _foreach_add_4[33] + getitem_2994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[34] + getitem_2995: "f32[3072][1]cuda:0" = _foreach_add_4[35] + getitem_2996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[36] + getitem_2997: "f32[768][1]cuda:0" = _foreach_add_4[37] + getitem_2998: "f32[768][1]cuda:0" = _foreach_add_4[38] + getitem_2999: "f32[768][1]cuda:0" = _foreach_add_4[39] + getitem_3000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[40] + getitem_3001: "f32[2304][1]cuda:0" = _foreach_add_4[41] + getitem_3002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[42] + getitem_3003: "f32[768][1]cuda:0" = _foreach_add_4[43] + getitem_3004: "f32[768][1]cuda:0" = _foreach_add_4[44] + getitem_3005: "f32[768][1]cuda:0" = _foreach_add_4[45] + getitem_3006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[46] + getitem_3007: "f32[3072][1]cuda:0" = _foreach_add_4[47] + getitem_3008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[48] + getitem_3009: "f32[768][1]cuda:0" = _foreach_add_4[49] + getitem_3010: "f32[768][1]cuda:0" = _foreach_add_4[50] + getitem_3011: "f32[768][1]cuda:0" = _foreach_add_4[51] + getitem_3012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[52] + getitem_3013: "f32[2304][1]cuda:0" = _foreach_add_4[53] + getitem_3014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[54] + getitem_3015: "f32[768][1]cuda:0" = _foreach_add_4[55] + getitem_3016: "f32[768][1]cuda:0" = _foreach_add_4[56] + getitem_3017: "f32[768][1]cuda:0" = _foreach_add_4[57] + getitem_3018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[58] + getitem_3019: "f32[3072][1]cuda:0" = _foreach_add_4[59] + getitem_3020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[60] + getitem_3021: "f32[768][1]cuda:0" = _foreach_add_4[61] + getitem_3022: "f32[768][1]cuda:0" = _foreach_add_4[62] + getitem_3023: "f32[768][1]cuda:0" = _foreach_add_4[63] + getitem_3024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[64] + getitem_3025: "f32[2304][1]cuda:0" = _foreach_add_4[65] + getitem_3026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[66] + getitem_3027: "f32[768][1]cuda:0" = _foreach_add_4[67] + getitem_3028: "f32[768][1]cuda:0" = _foreach_add_4[68] + getitem_3029: "f32[768][1]cuda:0" = _foreach_add_4[69] + getitem_3030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[70] + getitem_3031: "f32[3072][1]cuda:0" = _foreach_add_4[71] + getitem_3032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[72] + getitem_3033: "f32[768][1]cuda:0" = _foreach_add_4[73] + getitem_3034: "f32[768][1]cuda:0" = _foreach_add_4[74] + getitem_3035: "f32[768][1]cuda:0" = _foreach_add_4[75] + getitem_3036: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[76] + getitem_3037: "f32[2304][1]cuda:0" = _foreach_add_4[77] + getitem_3038: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[78] + getitem_3039: "f32[768][1]cuda:0" = _foreach_add_4[79] + getitem_3040: "f32[768][1]cuda:0" = _foreach_add_4[80] + getitem_3041: "f32[768][1]cuda:0" = _foreach_add_4[81] + getitem_3042: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[82] + getitem_3043: "f32[3072][1]cuda:0" = _foreach_add_4[83] + getitem_3044: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[84] + getitem_3045: "f32[768][1]cuda:0" = _foreach_add_4[85] + getitem_3046: "f32[768][1]cuda:0" = _foreach_add_4[86] + getitem_3047: "f32[768][1]cuda:0" = _foreach_add_4[87] + getitem_3048: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[88] + getitem_3049: "f32[2304][1]cuda:0" = _foreach_add_4[89] + getitem_3050: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[90] + getitem_3051: "f32[768][1]cuda:0" = _foreach_add_4[91] + getitem_3052: "f32[768][1]cuda:0" = _foreach_add_4[92] + getitem_3053: "f32[768][1]cuda:0" = _foreach_add_4[93] + getitem_3054: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[94] + getitem_3055: "f32[3072][1]cuda:0" = _foreach_add_4[95] + getitem_3056: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[96] + getitem_3057: "f32[768][1]cuda:0" = _foreach_add_4[97] + getitem_3058: "f32[768][1]cuda:0" = _foreach_add_4[98] + getitem_3059: "f32[768][1]cuda:0" = _foreach_add_4[99] + getitem_3060: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[100] + getitem_3061: "f32[2304][1]cuda:0" = _foreach_add_4[101] + getitem_3062: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[102] + getitem_3063: "f32[768][1]cuda:0" = _foreach_add_4[103] + getitem_3064: "f32[768][1]cuda:0" = _foreach_add_4[104] + getitem_3065: "f32[768][1]cuda:0" = _foreach_add_4[105] + getitem_3066: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[106] + getitem_3067: "f32[3072][1]cuda:0" = _foreach_add_4[107] + getitem_3068: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[108] + getitem_3069: "f32[768][1]cuda:0" = _foreach_add_4[109] + getitem_3070: "f32[768][1]cuda:0" = _foreach_add_4[110] + getitem_3071: "f32[768][1]cuda:0" = _foreach_add_4[111] + getitem_3072: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[112] + getitem_3073: "f32[2304][1]cuda:0" = _foreach_add_4[113] + getitem_3074: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[114] + getitem_3075: "f32[768][1]cuda:0" = _foreach_add_4[115] + getitem_3076: "f32[768][1]cuda:0" = _foreach_add_4[116] + getitem_3077: "f32[768][1]cuda:0" = _foreach_add_4[117] + getitem_3078: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[118] + getitem_3079: "f32[3072][1]cuda:0" = _foreach_add_4[119] + getitem_3080: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[120] + getitem_3081: "f32[768][1]cuda:0" = _foreach_add_4[121] + getitem_3082: "f32[768][1]cuda:0" = _foreach_add_4[122] + getitem_3083: "f32[768][1]cuda:0" = _foreach_add_4[123] + getitem_3084: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[124] + getitem_3085: "f32[2304][1]cuda:0" = _foreach_add_4[125] + getitem_3086: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[126] + getitem_3087: "f32[768][1]cuda:0" = _foreach_add_4[127] + getitem_3088: "f32[768][1]cuda:0" = _foreach_add_4[128] + getitem_3089: "f32[768][1]cuda:0" = _foreach_add_4[129] + getitem_3090: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[130] + getitem_3091: "f32[3072][1]cuda:0" = _foreach_add_4[131] + getitem_3092: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[132] + getitem_3093: "f32[768][1]cuda:0" = _foreach_add_4[133] + getitem_3094: "f32[768][1]cuda:0" = _foreach_add_4[134] + getitem_3095: "f32[768][1]cuda:0" = _foreach_add_4[135] + getitem_3096: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[136] + getitem_3097: "f32[2304][1]cuda:0" = _foreach_add_4[137] + getitem_3098: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[138] + getitem_3099: "f32[768][1]cuda:0" = _foreach_add_4[139] + getitem_3100: "f32[768][1]cuda:0" = _foreach_add_4[140] + getitem_3101: "f32[768][1]cuda:0" = _foreach_add_4[141] + getitem_3102: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[142] + getitem_3103: "f32[3072][1]cuda:0" = _foreach_add_4[143] + getitem_3104: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[144] + getitem_3105: "f32[768][1]cuda:0" = _foreach_add_4[145] + getitem_3106: "f32[768][1]cuda:0" = _foreach_add_4[146] + getitem_3107: "f32[768][1]cuda:0" = _foreach_add_4[147]; _foreach_add_4 = None + copy_: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg0_1, getitem_2960); arg0_1 = getitem_2960 = copy_ = None + copy__1: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg1_1, getitem_2961); arg1_1 = getitem_2961 = copy__1 = None + copy__2: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg2_1, getitem_2962); arg2_1 = getitem_2962 = copy__2 = None + copy__3: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg3_1, getitem_2963); arg3_1 = getitem_2963 = copy__3 = None + copy__4: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg4_1, getitem_2964); arg4_1 = getitem_2964 = copy__4 = None + copy__5: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg5_1, getitem_2965); arg5_1 = getitem_2965 = copy__5 = None + copy__6: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg6_1, getitem_2966); arg6_1 = getitem_2966 = copy__6 = None + copy__7: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg7_1, getitem_2967); arg7_1 = getitem_2967 = copy__7 = None + copy__8: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg8_1, getitem_2968); arg8_1 = getitem_2968 = copy__8 = None + copy__9: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg9_1, getitem_2969); arg9_1 = getitem_2969 = copy__9 = None + copy__10: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg10_1, getitem_2970); arg10_1 = getitem_2970 = copy__10 = None + copy__11: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg11_1, getitem_2971); arg11_1 = getitem_2971 = copy__11 = None + copy__12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg12_1, getitem_2972); arg12_1 = getitem_2972 = copy__12 = None + copy__13: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg13_1, getitem_2973); arg13_1 = getitem_2973 = copy__13 = None + copy__14: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg14_1, getitem_2974); arg14_1 = getitem_2974 = copy__14 = None + copy__15: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg15_1, getitem_2975); arg15_1 = getitem_2975 = copy__15 = None + copy__16: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg16_1, getitem_2976); arg16_1 = getitem_2976 = copy__16 = None + copy__17: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg17_1, getitem_2977); arg17_1 = getitem_2977 = copy__17 = None + copy__18: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg18_1, getitem_2978); arg18_1 = getitem_2978 = copy__18 = None + copy__19: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg19_1, getitem_2979); arg19_1 = getitem_2979 = copy__19 = None + copy__20: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg20_1, getitem_2980); arg20_1 = getitem_2980 = copy__20 = None + copy__21: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg21_1, getitem_2981); arg21_1 = getitem_2981 = copy__21 = None + copy__22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg22_1, getitem_2982); arg22_1 = getitem_2982 = copy__22 = None + copy__23: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg23_1, getitem_2983); arg23_1 = getitem_2983 = copy__23 = None + copy__24: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg24_1, getitem_2984); arg24_1 = getitem_2984 = copy__24 = None + copy__25: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg25_1, getitem_2985); arg25_1 = getitem_2985 = copy__25 = None + copy__26: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg26_1, getitem_2986); arg26_1 = getitem_2986 = copy__26 = None + copy__27: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg27_1, getitem_2987); arg27_1 = getitem_2987 = copy__27 = None + copy__28: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg28_1, getitem_2988); arg28_1 = getitem_2988 = copy__28 = None + copy__29: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg29_1, getitem_2989); arg29_1 = getitem_2989 = copy__29 = None + copy__30: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg30_1, getitem_2990); arg30_1 = getitem_2990 = copy__30 = None + copy__31: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg31_1, getitem_2991); arg31_1 = getitem_2991 = copy__31 = None + copy__32: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg32_1, getitem_2992); arg32_1 = getitem_2992 = copy__32 = None + copy__33: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg33_1, getitem_2993); arg33_1 = getitem_2993 = copy__33 = None + copy__34: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg34_1, getitem_2994); arg34_1 = getitem_2994 = copy__34 = None + copy__35: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg35_1, getitem_2995); arg35_1 = getitem_2995 = copy__35 = None + copy__36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg36_1, getitem_2996); arg36_1 = getitem_2996 = copy__36 = None + copy__37: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg37_1, getitem_2997); arg37_1 = getitem_2997 = copy__37 = None + copy__38: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg38_1, getitem_2998); arg38_1 = getitem_2998 = copy__38 = None + copy__39: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg39_1, getitem_2999); arg39_1 = getitem_2999 = copy__39 = None + copy__40: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg40_1, getitem_3000); arg40_1 = getitem_3000 = copy__40 = None + copy__41: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg41_1, getitem_3001); arg41_1 = getitem_3001 = copy__41 = None + copy__42: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg42_1, getitem_3002); arg42_1 = getitem_3002 = copy__42 = None + copy__43: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg43_1, getitem_3003); arg43_1 = getitem_3003 = copy__43 = None + copy__44: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg44_1, getitem_3004); arg44_1 = getitem_3004 = copy__44 = None + copy__45: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg45_1, getitem_3005); arg45_1 = getitem_3005 = copy__45 = None + copy__46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg46_1, getitem_3006); arg46_1 = getitem_3006 = copy__46 = None + copy__47: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg47_1, getitem_3007); arg47_1 = getitem_3007 = copy__47 = None + copy__48: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg48_1, getitem_3008); arg48_1 = getitem_3008 = copy__48 = None + copy__49: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg49_1, getitem_3009); arg49_1 = getitem_3009 = copy__49 = None + copy__50: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg50_1, getitem_3010); arg50_1 = getitem_3010 = copy__50 = None + copy__51: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg51_1, getitem_3011); arg51_1 = getitem_3011 = copy__51 = None + copy__52: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg52_1, getitem_3012); arg52_1 = getitem_3012 = copy__52 = None + copy__53: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg53_1, getitem_3013); arg53_1 = getitem_3013 = copy__53 = None + copy__54: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg54_1, getitem_3014); arg54_1 = getitem_3014 = copy__54 = None + copy__55: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg55_1, getitem_3015); arg55_1 = getitem_3015 = copy__55 = None + copy__56: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg56_1, getitem_3016); arg56_1 = getitem_3016 = copy__56 = None + copy__57: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg57_1, getitem_3017); arg57_1 = getitem_3017 = copy__57 = None + copy__58: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg58_1, getitem_3018); arg58_1 = getitem_3018 = copy__58 = None + copy__59: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg59_1, getitem_3019); arg59_1 = getitem_3019 = copy__59 = None + copy__60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg60_1, getitem_3020); arg60_1 = getitem_3020 = copy__60 = None + copy__61: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg61_1, getitem_3021); arg61_1 = getitem_3021 = copy__61 = None + copy__62: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg62_1, getitem_3022); arg62_1 = getitem_3022 = copy__62 = None + copy__63: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg63_1, getitem_3023); arg63_1 = getitem_3023 = copy__63 = None + copy__64: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg64_1, getitem_3024); arg64_1 = getitem_3024 = copy__64 = None + copy__65: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg65_1, getitem_3025); arg65_1 = getitem_3025 = copy__65 = None + copy__66: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg66_1, getitem_3026); arg66_1 = getitem_3026 = copy__66 = None + copy__67: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg67_1, getitem_3027); arg67_1 = getitem_3027 = copy__67 = None + copy__68: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg68_1, getitem_3028); arg68_1 = getitem_3028 = copy__68 = None + copy__69: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg69_1, getitem_3029); arg69_1 = getitem_3029 = copy__69 = None + copy__70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg70_1, getitem_3030); arg70_1 = getitem_3030 = copy__70 = None + copy__71: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg71_1, getitem_3031); arg71_1 = getitem_3031 = copy__71 = None + copy__72: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg72_1, getitem_3032); arg72_1 = getitem_3032 = copy__72 = None + copy__73: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg73_1, getitem_3033); arg73_1 = getitem_3033 = copy__73 = None + copy__74: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg74_1, getitem_3034); arg74_1 = getitem_3034 = copy__74 = None + copy__75: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg75_1, getitem_3035); arg75_1 = getitem_3035 = copy__75 = None + copy__76: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg76_1, getitem_3036); arg76_1 = getitem_3036 = copy__76 = None + copy__77: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg77_1, getitem_3037); arg77_1 = getitem_3037 = copy__77 = None + copy__78: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg78_1, getitem_3038); arg78_1 = getitem_3038 = copy__78 = None + copy__79: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg79_1, getitem_3039); arg79_1 = getitem_3039 = copy__79 = None + copy__80: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg80_1, getitem_3040); arg80_1 = getitem_3040 = copy__80 = None + copy__81: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg81_1, getitem_3041); arg81_1 = getitem_3041 = copy__81 = None + copy__82: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg82_1, getitem_3042); arg82_1 = getitem_3042 = copy__82 = None + copy__83: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg83_1, getitem_3043); arg83_1 = getitem_3043 = copy__83 = None + copy__84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg84_1, getitem_3044); arg84_1 = getitem_3044 = copy__84 = None + copy__85: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg85_1, getitem_3045); arg85_1 = getitem_3045 = copy__85 = None + copy__86: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg86_1, getitem_3046); arg86_1 = getitem_3046 = copy__86 = None + copy__87: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg87_1, getitem_3047); arg87_1 = getitem_3047 = copy__87 = None + copy__88: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg88_1, getitem_3048); arg88_1 = getitem_3048 = copy__88 = None + copy__89: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg89_1, getitem_3049); arg89_1 = getitem_3049 = copy__89 = None + copy__90: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg90_1, getitem_3050); arg90_1 = getitem_3050 = copy__90 = None + copy__91: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg91_1, getitem_3051); arg91_1 = getitem_3051 = copy__91 = None + copy__92: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg92_1, getitem_3052); arg92_1 = getitem_3052 = copy__92 = None + copy__93: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg93_1, getitem_3053); arg93_1 = getitem_3053 = copy__93 = None + copy__94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg94_1, getitem_3054); arg94_1 = getitem_3054 = copy__94 = None + copy__95: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg95_1, getitem_3055); arg95_1 = getitem_3055 = copy__95 = None + copy__96: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg96_1, getitem_3056); arg96_1 = getitem_3056 = copy__96 = None + copy__97: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg97_1, getitem_3057); arg97_1 = getitem_3057 = copy__97 = None + copy__98: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg98_1, getitem_3058); arg98_1 = getitem_3058 = copy__98 = None + copy__99: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg99_1, getitem_3059); arg99_1 = getitem_3059 = copy__99 = None + copy__100: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg100_1, getitem_3060); arg100_1 = getitem_3060 = copy__100 = None + copy__101: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg101_1, getitem_3061); arg101_1 = getitem_3061 = copy__101 = None + copy__102: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg102_1, getitem_3062); arg102_1 = getitem_3062 = copy__102 = None + copy__103: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg103_1, getitem_3063); arg103_1 = getitem_3063 = copy__103 = None + copy__104: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg104_1, getitem_3064); arg104_1 = getitem_3064 = copy__104 = None + copy__105: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg105_1, getitem_3065); arg105_1 = getitem_3065 = copy__105 = None + copy__106: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg106_1, getitem_3066); arg106_1 = getitem_3066 = copy__106 = None + copy__107: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg107_1, getitem_3067); arg107_1 = getitem_3067 = copy__107 = None + copy__108: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg108_1, getitem_3068); arg108_1 = getitem_3068 = copy__108 = None + copy__109: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg109_1, getitem_3069); arg109_1 = getitem_3069 = copy__109 = None + copy__110: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg110_1, getitem_3070); arg110_1 = getitem_3070 = copy__110 = None + copy__111: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg111_1, getitem_3071); arg111_1 = getitem_3071 = copy__111 = None + copy__112: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg112_1, getitem_3072); arg112_1 = getitem_3072 = copy__112 = None + copy__113: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg113_1, getitem_3073); arg113_1 = getitem_3073 = copy__113 = None + copy__114: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg114_1, getitem_3074); arg114_1 = getitem_3074 = copy__114 = None + copy__115: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg115_1, getitem_3075); arg115_1 = getitem_3075 = copy__115 = None + copy__116: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg116_1, getitem_3076); arg116_1 = getitem_3076 = copy__116 = None + copy__117: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg117_1, getitem_3077); arg117_1 = getitem_3077 = copy__117 = None + copy__118: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg118_1, getitem_3078); arg118_1 = getitem_3078 = copy__118 = None + copy__119: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg119_1, getitem_3079); arg119_1 = getitem_3079 = copy__119 = None + copy__120: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg120_1, getitem_3080); arg120_1 = getitem_3080 = copy__120 = None + copy__121: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg121_1, getitem_3081); arg121_1 = getitem_3081 = copy__121 = None + copy__122: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg122_1, getitem_3082); arg122_1 = getitem_3082 = copy__122 = None + copy__123: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg123_1, getitem_3083); arg123_1 = getitem_3083 = copy__123 = None + copy__124: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg124_1, getitem_3084); arg124_1 = getitem_3084 = copy__124 = None + copy__125: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg125_1, getitem_3085); arg125_1 = getitem_3085 = copy__125 = None + copy__126: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg126_1, getitem_3086); arg126_1 = getitem_3086 = copy__126 = None + copy__127: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg127_1, getitem_3087); arg127_1 = getitem_3087 = copy__127 = None + copy__128: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg128_1, getitem_3088); arg128_1 = getitem_3088 = copy__128 = None + copy__129: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg129_1, getitem_3089); arg129_1 = getitem_3089 = copy__129 = None + copy__130: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg130_1, getitem_3090); arg130_1 = getitem_3090 = copy__130 = None + copy__131: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg131_1, getitem_3091); arg131_1 = getitem_3091 = copy__131 = None + copy__132: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg132_1, getitem_3092); arg132_1 = getitem_3092 = copy__132 = None + copy__133: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg133_1, getitem_3093); arg133_1 = getitem_3093 = copy__133 = None + copy__134: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg134_1, getitem_3094); arg134_1 = getitem_3094 = copy__134 = None + copy__135: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg135_1, getitem_3095); arg135_1 = getitem_3095 = copy__135 = None + copy__136: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg136_1, getitem_3096); arg136_1 = getitem_3096 = copy__136 = None + copy__137: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg137_1, getitem_3097); arg137_1 = getitem_3097 = copy__137 = None + copy__138: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg138_1, getitem_3098); arg138_1 = getitem_3098 = copy__138 = None + copy__139: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg139_1, getitem_3099); arg139_1 = getitem_3099 = copy__139 = None + copy__140: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg140_1, getitem_3100); arg140_1 = getitem_3100 = copy__140 = None + copy__141: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg141_1, getitem_3101); arg141_1 = getitem_3101 = copy__141 = None + copy__142: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg142_1, getitem_3102); arg142_1 = getitem_3102 = copy__142 = None + copy__143: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg143_1, getitem_3103); arg143_1 = getitem_3103 = copy__143 = None + copy__144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg144_1, getitem_3104); arg144_1 = getitem_3104 = copy__144 = None + copy__145: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg145_1, getitem_3105); arg145_1 = getitem_3105 = copy__145 = None + copy__146: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg146_1, getitem_3106); arg146_1 = getitem_3106 = copy__146 = None + copy__147: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg147_1, getitem_3107); arg147_1 = getitem_3107 = copy__147 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1) + copy__148: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg148_1, getitem_1); arg148_1 = getitem_1 = copy__148 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1) + copy__149: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg149_1, getitem_445); arg149_1 = getitem_445 = copy__149 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + copy__150: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg150_1, getitem_889); arg150_1 = getitem_889 = copy__150 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1) + copy__151: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg299_1, getitem_444); arg299_1 = getitem_444 = copy__151 = None + copy__152: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg300_1, getitem_446); arg300_1 = getitem_446 = copy__152 = None + copy__153: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg301_1, getitem_447); arg301_1 = getitem_447 = copy__153 = None + copy__154: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg302_1, getitem_448); arg302_1 = getitem_448 = copy__154 = None + copy__155: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg303_1, getitem_449); arg303_1 = getitem_449 = copy__155 = None + copy__156: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg304_1, getitem_450); arg304_1 = getitem_450 = copy__156 = None + copy__157: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg305_1, getitem_451); arg305_1 = getitem_451 = copy__157 = None + copy__158: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg306_1, getitem_452); arg306_1 = getitem_452 = copy__158 = None + copy__159: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg307_1, getitem_453); arg307_1 = getitem_453 = copy__159 = None + copy__160: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg308_1, getitem_454); arg308_1 = getitem_454 = copy__160 = None + copy__161: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg309_1, getitem_455); arg309_1 = getitem_455 = copy__161 = None + copy__162: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg310_1, getitem_456); arg310_1 = getitem_456 = copy__162 = None + copy__163: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg311_1, getitem_457); arg311_1 = getitem_457 = copy__163 = None + copy__164: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg312_1, getitem_458); arg312_1 = getitem_458 = copy__164 = None + copy__165: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg313_1, getitem_459); arg313_1 = getitem_459 = copy__165 = None + copy__166: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg314_1, getitem_460); arg314_1 = getitem_460 = copy__166 = None + copy__167: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg315_1, getitem_461); arg315_1 = getitem_461 = copy__167 = None + copy__168: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg316_1, getitem_462); arg316_1 = getitem_462 = copy__168 = None + copy__169: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg317_1, getitem_463); arg317_1 = getitem_463 = copy__169 = None + copy__170: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg318_1, getitem_464); arg318_1 = getitem_464 = copy__170 = None + copy__171: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg319_1, getitem_465); arg319_1 = getitem_465 = copy__171 = None + copy__172: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg320_1, getitem_466); arg320_1 = getitem_466 = copy__172 = None + copy__173: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg321_1, getitem_467); arg321_1 = getitem_467 = copy__173 = None + copy__174: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg322_1, getitem_468); arg322_1 = getitem_468 = copy__174 = None + copy__175: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg323_1, getitem_469); arg323_1 = getitem_469 = copy__175 = None + copy__176: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg324_1, getitem_470); arg324_1 = getitem_470 = copy__176 = None + copy__177: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg325_1, getitem_471); arg325_1 = getitem_471 = copy__177 = None + copy__178: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg326_1, getitem_472); arg326_1 = getitem_472 = copy__178 = None + copy__179: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg327_1, getitem_473); arg327_1 = getitem_473 = copy__179 = None + copy__180: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg328_1, getitem_474); arg328_1 = getitem_474 = copy__180 = None + copy__181: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg329_1, getitem_475); arg329_1 = getitem_475 = copy__181 = None + copy__182: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg330_1, getitem_476); arg330_1 = getitem_476 = copy__182 = None + copy__183: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg331_1, getitem_477); arg331_1 = getitem_477 = copy__183 = None + copy__184: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg332_1, getitem_478); arg332_1 = getitem_478 = copy__184 = None + copy__185: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg333_1, getitem_479); arg333_1 = getitem_479 = copy__185 = None + copy__186: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg334_1, getitem_480); arg334_1 = getitem_480 = copy__186 = None + copy__187: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg335_1, getitem_481); arg335_1 = getitem_481 = copy__187 = None + copy__188: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg336_1, getitem_482); arg336_1 = getitem_482 = copy__188 = None + copy__189: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg337_1, getitem_483); arg337_1 = getitem_483 = copy__189 = None + copy__190: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg338_1, getitem_484); arg338_1 = getitem_484 = copy__190 = None + copy__191: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg339_1, getitem_485); arg339_1 = getitem_485 = copy__191 = None + copy__192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg340_1, getitem_486); arg340_1 = getitem_486 = copy__192 = None + copy__193: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg341_1, getitem_487); arg341_1 = getitem_487 = copy__193 = None + copy__194: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg342_1, getitem_488); arg342_1 = getitem_488 = copy__194 = None + copy__195: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg343_1, getitem_489); arg343_1 = getitem_489 = copy__195 = None + copy__196: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg344_1, getitem_490); arg344_1 = getitem_490 = copy__196 = None + copy__197: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg345_1, getitem_491); arg345_1 = getitem_491 = copy__197 = None + copy__198: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg346_1, getitem_492); arg346_1 = getitem_492 = copy__198 = None + copy__199: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg347_1, getitem_493); arg347_1 = getitem_493 = copy__199 = None + copy__200: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg348_1, getitem_494); arg348_1 = getitem_494 = copy__200 = None + copy__201: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg349_1, getitem_495); arg349_1 = getitem_495 = copy__201 = None + copy__202: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg350_1, getitem_496); arg350_1 = getitem_496 = copy__202 = None + copy__203: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg351_1, getitem_497); arg351_1 = getitem_497 = copy__203 = None + copy__204: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg352_1, getitem_498); arg352_1 = getitem_498 = copy__204 = None + copy__205: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg353_1, getitem_499); arg353_1 = getitem_499 = copy__205 = None + copy__206: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg354_1, getitem_500); arg354_1 = getitem_500 = copy__206 = None + copy__207: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg355_1, getitem_501); arg355_1 = getitem_501 = copy__207 = None + copy__208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg356_1, getitem_502); arg356_1 = getitem_502 = copy__208 = None + copy__209: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg357_1, getitem_503); arg357_1 = getitem_503 = copy__209 = None + copy__210: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg358_1, getitem_504); arg358_1 = getitem_504 = copy__210 = None + copy__211: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg359_1, getitem_505); arg359_1 = getitem_505 = copy__211 = None + copy__212: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg360_1, getitem_506); arg360_1 = getitem_506 = copy__212 = None + copy__213: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg361_1, getitem_507); arg361_1 = getitem_507 = copy__213 = None + copy__214: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg362_1, getitem_508); arg362_1 = getitem_508 = copy__214 = None + copy__215: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg363_1, getitem_509); arg363_1 = getitem_509 = copy__215 = None + copy__216: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg364_1, getitem_510); arg364_1 = getitem_510 = copy__216 = None + copy__217: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg365_1, getitem_511); arg365_1 = getitem_511 = copy__217 = None + copy__218: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg366_1, getitem_512); arg366_1 = getitem_512 = copy__218 = None + copy__219: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg367_1, getitem_513); arg367_1 = getitem_513 = copy__219 = None + copy__220: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg368_1, getitem_514); arg368_1 = getitem_514 = copy__220 = None + copy__221: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg369_1, getitem_515); arg369_1 = getitem_515 = copy__221 = None + copy__222: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg370_1, getitem_516); arg370_1 = getitem_516 = copy__222 = None + copy__223: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg371_1, getitem_517); arg371_1 = getitem_517 = copy__223 = None + copy__224: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg372_1, getitem_518); arg372_1 = getitem_518 = copy__224 = None + copy__225: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg373_1, getitem_519); arg373_1 = getitem_519 = copy__225 = None + copy__226: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg374_1, getitem_520); arg374_1 = getitem_520 = copy__226 = None + copy__227: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg375_1, getitem_521); arg375_1 = getitem_521 = copy__227 = None + copy__228: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg376_1, getitem_522); arg376_1 = getitem_522 = copy__228 = None + copy__229: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg377_1, getitem_523); arg377_1 = getitem_523 = copy__229 = None + copy__230: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg378_1, getitem_524); arg378_1 = getitem_524 = copy__230 = None + copy__231: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg379_1, getitem_525); arg379_1 = getitem_525 = copy__231 = None + copy__232: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg380_1, getitem_526); arg380_1 = getitem_526 = copy__232 = None + copy__233: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg381_1, getitem_527); arg381_1 = getitem_527 = copy__233 = None + copy__234: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg382_1, getitem_528); arg382_1 = getitem_528 = copy__234 = None + copy__235: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg383_1, getitem_529); arg383_1 = getitem_529 = copy__235 = None + copy__236: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg384_1, getitem_530); arg384_1 = getitem_530 = copy__236 = None + copy__237: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg385_1, getitem_531); arg385_1 = getitem_531 = copy__237 = None + copy__238: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg386_1, getitem_532); arg386_1 = getitem_532 = copy__238 = None + copy__239: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg387_1, getitem_533); arg387_1 = getitem_533 = copy__239 = None + copy__240: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg388_1, getitem_534); arg388_1 = getitem_534 = copy__240 = None + copy__241: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg389_1, getitem_535); arg389_1 = getitem_535 = copy__241 = None + copy__242: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg390_1, getitem_536); arg390_1 = getitem_536 = copy__242 = None + copy__243: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg391_1, getitem_537); arg391_1 = getitem_537 = copy__243 = None + copy__244: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg392_1, getitem_538); arg392_1 = getitem_538 = copy__244 = None + copy__245: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg393_1, getitem_539); arg393_1 = getitem_539 = copy__245 = None + copy__246: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg394_1, getitem_540); arg394_1 = getitem_540 = copy__246 = None + copy__247: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg395_1, getitem_541); arg395_1 = getitem_541 = copy__247 = None + copy__248: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg396_1, getitem_542); arg396_1 = getitem_542 = copy__248 = None + copy__249: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg397_1, getitem_543); arg397_1 = getitem_543 = copy__249 = None + copy__250: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg398_1, getitem_544); arg398_1 = getitem_544 = copy__250 = None + copy__251: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg399_1, getitem_545); arg399_1 = getitem_545 = copy__251 = None + copy__252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg400_1, getitem_546); arg400_1 = getitem_546 = copy__252 = None + copy__253: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg401_1, getitem_547); arg401_1 = getitem_547 = copy__253 = None + copy__254: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg402_1, getitem_548); arg402_1 = getitem_548 = copy__254 = None + copy__255: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg403_1, getitem_549); arg403_1 = getitem_549 = copy__255 = None + copy__256: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg404_1, getitem_550); arg404_1 = getitem_550 = copy__256 = None + copy__257: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg405_1, getitem_551); arg405_1 = getitem_551 = copy__257 = None + copy__258: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg406_1, getitem_552); arg406_1 = getitem_552 = copy__258 = None + copy__259: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg407_1, getitem_553); arg407_1 = getitem_553 = copy__259 = None + copy__260: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg408_1, getitem_554); arg408_1 = getitem_554 = copy__260 = None + copy__261: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg409_1, getitem_555); arg409_1 = getitem_555 = copy__261 = None + copy__262: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg410_1, getitem_556); arg410_1 = getitem_556 = copy__262 = None + copy__263: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg411_1, getitem_557); arg411_1 = getitem_557 = copy__263 = None + copy__264: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg412_1, getitem_558); arg412_1 = getitem_558 = copy__264 = None + copy__265: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg413_1, getitem_559); arg413_1 = getitem_559 = copy__265 = None + copy__266: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg414_1, getitem_560); arg414_1 = getitem_560 = copy__266 = None + copy__267: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg415_1, getitem_561); arg415_1 = getitem_561 = copy__267 = None + copy__268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg416_1, getitem_562); arg416_1 = getitem_562 = copy__268 = None + copy__269: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg417_1, getitem_563); arg417_1 = getitem_563 = copy__269 = None + copy__270: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg418_1, getitem_564); arg418_1 = getitem_564 = copy__270 = None + copy__271: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg419_1, getitem_565); arg419_1 = getitem_565 = copy__271 = None + copy__272: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg420_1, getitem_566); arg420_1 = getitem_566 = copy__272 = None + copy__273: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg421_1, getitem_567); arg421_1 = getitem_567 = copy__273 = None + copy__274: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg422_1, getitem_568); arg422_1 = getitem_568 = copy__274 = None + copy__275: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg423_1, getitem_569); arg423_1 = getitem_569 = copy__275 = None + copy__276: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg424_1, getitem_570); arg424_1 = getitem_570 = copy__276 = None + copy__277: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg425_1, getitem_571); arg425_1 = getitem_571 = copy__277 = None + copy__278: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg426_1, getitem_572); arg426_1 = getitem_572 = copy__278 = None + copy__279: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg427_1, getitem_573); arg427_1 = getitem_573 = copy__279 = None + copy__280: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg428_1, getitem_574); arg428_1 = getitem_574 = copy__280 = None + copy__281: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg429_1, getitem_575); arg429_1 = getitem_575 = copy__281 = None + copy__282: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg430_1, getitem_576); arg430_1 = getitem_576 = copy__282 = None + copy__283: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg431_1, getitem_577); arg431_1 = getitem_577 = copy__283 = None + copy__284: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg432_1, getitem_578); arg432_1 = getitem_578 = copy__284 = None + copy__285: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg433_1, getitem_579); arg433_1 = getitem_579 = copy__285 = None + copy__286: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg434_1, getitem_580); arg434_1 = getitem_580 = copy__286 = None + copy__287: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg435_1, getitem_581); arg435_1 = getitem_581 = copy__287 = None + copy__288: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg436_1, getitem_582); arg436_1 = getitem_582 = copy__288 = None + copy__289: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg437_1, getitem_583); arg437_1 = getitem_583 = copy__289 = None + copy__290: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg438_1, getitem_584); arg438_1 = getitem_584 = copy__290 = None + copy__291: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg439_1, getitem_585); arg439_1 = getitem_585 = copy__291 = None + copy__292: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg440_1, getitem_586); arg440_1 = getitem_586 = copy__292 = None + copy__293: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg441_1, getitem_587); arg441_1 = getitem_587 = copy__293 = None + copy__294: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg442_1, getitem_588); arg442_1 = getitem_588 = copy__294 = None + copy__295: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg443_1, getitem_589); arg443_1 = getitem_589 = copy__295 = None + copy__296: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg444_1, getitem_590); arg444_1 = getitem_590 = copy__296 = None + copy__297: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg445_1, getitem_591); arg445_1 = getitem_591 = copy__297 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + copy__298: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg446_1, getitem_888); arg446_1 = getitem_888 = copy__298 = None + copy__299: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg447_1, getitem_890); arg447_1 = getitem_890 = copy__299 = None + copy__300: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg448_1, getitem_891); arg448_1 = getitem_891 = copy__300 = None + copy__301: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg449_1, getitem_892); arg449_1 = getitem_892 = copy__301 = None + copy__302: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg450_1, getitem_893); arg450_1 = getitem_893 = copy__302 = None + copy__303: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg451_1, getitem_894); arg451_1 = getitem_894 = copy__303 = None + copy__304: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg452_1, getitem_895); arg452_1 = getitem_895 = copy__304 = None + copy__305: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg453_1, getitem_896); arg453_1 = getitem_896 = copy__305 = None + copy__306: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg454_1, getitem_897); arg454_1 = getitem_897 = copy__306 = None + copy__307: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg455_1, getitem_898); arg455_1 = getitem_898 = copy__307 = None + copy__308: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg456_1, getitem_899); arg456_1 = getitem_899 = copy__308 = None + copy__309: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg457_1, getitem_900); arg457_1 = getitem_900 = copy__309 = None + copy__310: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg458_1, getitem_901); arg458_1 = getitem_901 = copy__310 = None + copy__311: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg459_1, getitem_902); arg459_1 = getitem_902 = copy__311 = None + copy__312: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg460_1, getitem_903); arg460_1 = getitem_903 = copy__312 = None + copy__313: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg461_1, getitem_904); arg461_1 = getitem_904 = copy__313 = None + copy__314: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg462_1, getitem_905); arg462_1 = getitem_905 = copy__314 = None + copy__315: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg463_1, getitem_906); arg463_1 = getitem_906 = copy__315 = None + copy__316: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg464_1, getitem_907); arg464_1 = getitem_907 = copy__316 = None + copy__317: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg465_1, getitem_908); arg465_1 = getitem_908 = copy__317 = None + copy__318: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg466_1, getitem_909); arg466_1 = getitem_909 = copy__318 = None + copy__319: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg467_1, getitem_910); arg467_1 = getitem_910 = copy__319 = None + copy__320: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg468_1, getitem_911); arg468_1 = getitem_911 = copy__320 = None + copy__321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg469_1, getitem_912); arg469_1 = getitem_912 = copy__321 = None + copy__322: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg470_1, getitem_913); arg470_1 = getitem_913 = copy__322 = None + copy__323: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg471_1, getitem_914); arg471_1 = getitem_914 = copy__323 = None + copy__324: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg472_1, getitem_915); arg472_1 = getitem_915 = copy__324 = None + copy__325: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg473_1, getitem_916); arg473_1 = getitem_916 = copy__325 = None + copy__326: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg474_1, getitem_917); arg474_1 = getitem_917 = copy__326 = None + copy__327: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg475_1, getitem_918); arg475_1 = getitem_918 = copy__327 = None + copy__328: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg476_1, getitem_919); arg476_1 = getitem_919 = copy__328 = None + copy__329: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg477_1, getitem_920); arg477_1 = getitem_920 = copy__329 = None + copy__330: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg478_1, getitem_921); arg478_1 = getitem_921 = copy__330 = None + copy__331: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg479_1, getitem_922); arg479_1 = getitem_922 = copy__331 = None + copy__332: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg480_1, getitem_923); arg480_1 = getitem_923 = copy__332 = None + copy__333: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg481_1, getitem_924); arg481_1 = getitem_924 = copy__333 = None + copy__334: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg482_1, getitem_925); arg482_1 = getitem_925 = copy__334 = None + copy__335: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg483_1, getitem_926); arg483_1 = getitem_926 = copy__335 = None + copy__336: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg484_1, getitem_927); arg484_1 = getitem_927 = copy__336 = None + copy__337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg485_1, getitem_928); arg485_1 = getitem_928 = copy__337 = None + copy__338: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg486_1, getitem_929); arg486_1 = getitem_929 = copy__338 = None + copy__339: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg487_1, getitem_930); arg487_1 = getitem_930 = copy__339 = None + copy__340: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg488_1, getitem_931); arg488_1 = getitem_931 = copy__340 = None + copy__341: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg489_1, getitem_932); arg489_1 = getitem_932 = copy__341 = None + copy__342: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg490_1, getitem_933); arg490_1 = getitem_933 = copy__342 = None + copy__343: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg491_1, getitem_934); arg491_1 = getitem_934 = copy__343 = None + copy__344: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg492_1, getitem_935); arg492_1 = getitem_935 = copy__344 = None + copy__345: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg493_1, getitem_936); arg493_1 = getitem_936 = copy__345 = None + copy__346: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg494_1, getitem_937); arg494_1 = getitem_937 = copy__346 = None + copy__347: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg495_1, getitem_938); arg495_1 = getitem_938 = copy__347 = None + copy__348: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg496_1, getitem_939); arg496_1 = getitem_939 = copy__348 = None + copy__349: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg497_1, getitem_940); arg497_1 = getitem_940 = copy__349 = None + copy__350: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg498_1, getitem_941); arg498_1 = getitem_941 = copy__350 = None + copy__351: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg499_1, getitem_942); arg499_1 = getitem_942 = copy__351 = None + copy__352: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg500_1, getitem_943); arg500_1 = getitem_943 = copy__352 = None + copy__353: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg501_1, getitem_944); arg501_1 = getitem_944 = copy__353 = None + copy__354: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg502_1, getitem_945); arg502_1 = getitem_945 = copy__354 = None + copy__355: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg503_1, getitem_946); arg503_1 = getitem_946 = copy__355 = None + copy__356: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg504_1, getitem_947); arg504_1 = getitem_947 = copy__356 = None + copy__357: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg505_1, getitem_948); arg505_1 = getitem_948 = copy__357 = None + copy__358: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg506_1, getitem_949); arg506_1 = getitem_949 = copy__358 = None + copy__359: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg507_1, getitem_950); arg507_1 = getitem_950 = copy__359 = None + copy__360: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg508_1, getitem_951); arg508_1 = getitem_951 = copy__360 = None + copy__361: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg509_1, getitem_952); arg509_1 = getitem_952 = copy__361 = None + copy__362: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg510_1, getitem_953); arg510_1 = getitem_953 = copy__362 = None + copy__363: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg511_1, getitem_954); arg511_1 = getitem_954 = copy__363 = None + copy__364: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg512_1, getitem_955); arg512_1 = getitem_955 = copy__364 = None + copy__365: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg513_1, getitem_956); arg513_1 = getitem_956 = copy__365 = None + copy__366: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg514_1, getitem_957); arg514_1 = getitem_957 = copy__366 = None + copy__367: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg515_1, getitem_958); arg515_1 = getitem_958 = copy__367 = None + copy__368: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg516_1, getitem_959); arg516_1 = getitem_959 = copy__368 = None + copy__369: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg517_1, getitem_960); arg517_1 = getitem_960 = copy__369 = None + copy__370: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg518_1, getitem_961); arg518_1 = getitem_961 = copy__370 = None + copy__371: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg519_1, getitem_962); arg519_1 = getitem_962 = copy__371 = None + copy__372: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg520_1, getitem_963); arg520_1 = getitem_963 = copy__372 = None + copy__373: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg521_1, getitem_964); arg521_1 = getitem_964 = copy__373 = None + copy__374: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg522_1, getitem_965); arg522_1 = getitem_965 = copy__374 = None + copy__375: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg523_1, getitem_966); arg523_1 = getitem_966 = copy__375 = None + copy__376: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg524_1, getitem_967); arg524_1 = getitem_967 = copy__376 = None + copy__377: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg525_1, getitem_968); arg525_1 = getitem_968 = copy__377 = None + copy__378: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg526_1, getitem_969); arg526_1 = getitem_969 = copy__378 = None + copy__379: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg527_1, getitem_970); arg527_1 = getitem_970 = copy__379 = None + copy__380: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg528_1, getitem_971); arg528_1 = getitem_971 = copy__380 = None + copy__381: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg529_1, getitem_972); arg529_1 = getitem_972 = copy__381 = None + copy__382: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg530_1, getitem_973); arg530_1 = getitem_973 = copy__382 = None + copy__383: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg531_1, getitem_974); arg531_1 = getitem_974 = copy__383 = None + copy__384: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg532_1, getitem_975); arg532_1 = getitem_975 = copy__384 = None + copy__385: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg533_1, getitem_976); arg533_1 = getitem_976 = copy__385 = None + copy__386: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg534_1, getitem_977); arg534_1 = getitem_977 = copy__386 = None + copy__387: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg535_1, getitem_978); arg535_1 = getitem_978 = copy__387 = None + copy__388: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg536_1, getitem_979); arg536_1 = getitem_979 = copy__388 = None + copy__389: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg537_1, getitem_980); arg537_1 = getitem_980 = copy__389 = None + copy__390: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg538_1, getitem_981); arg538_1 = getitem_981 = copy__390 = None + copy__391: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg539_1, getitem_982); arg539_1 = getitem_982 = copy__391 = None + copy__392: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg540_1, getitem_983); arg540_1 = getitem_983 = copy__392 = None + copy__393: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg541_1, getitem_984); arg541_1 = getitem_984 = copy__393 = None + copy__394: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg542_1, getitem_985); arg542_1 = getitem_985 = copy__394 = None + copy__395: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg543_1, getitem_986); arg543_1 = getitem_986 = copy__395 = None + copy__396: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg544_1, getitem_987); arg544_1 = getitem_987 = copy__396 = None + copy__397: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg545_1, getitem_988); arg545_1 = getitem_988 = copy__397 = None + copy__398: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg546_1, getitem_989); arg546_1 = getitem_989 = copy__398 = None + copy__399: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg547_1, getitem_990); arg547_1 = getitem_990 = copy__399 = None + copy__400: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg548_1, getitem_991); arg548_1 = getitem_991 = copy__400 = None + copy__401: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg549_1, getitem_992); arg549_1 = getitem_992 = copy__401 = None + copy__402: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg550_1, getitem_993); arg550_1 = getitem_993 = copy__402 = None + copy__403: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg551_1, getitem_994); arg551_1 = getitem_994 = copy__403 = None + copy__404: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg552_1, getitem_995); arg552_1 = getitem_995 = copy__404 = None + copy__405: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg553_1, getitem_996); arg553_1 = getitem_996 = copy__405 = None + copy__406: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg554_1, getitem_997); arg554_1 = getitem_997 = copy__406 = None + copy__407: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg555_1, getitem_998); arg555_1 = getitem_998 = copy__407 = None + copy__408: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg556_1, getitem_999); arg556_1 = getitem_999 = copy__408 = None + copy__409: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg557_1, getitem_1000); arg557_1 = getitem_1000 = copy__409 = None + copy__410: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg558_1, getitem_1001); arg558_1 = getitem_1001 = copy__410 = None + copy__411: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg559_1, getitem_1002); arg559_1 = getitem_1002 = copy__411 = None + copy__412: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg560_1, getitem_1003); arg560_1 = getitem_1003 = copy__412 = None + copy__413: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg561_1, getitem_1004); arg561_1 = getitem_1004 = copy__413 = None + copy__414: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg562_1, getitem_1005); arg562_1 = getitem_1005 = copy__414 = None + copy__415: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg563_1, getitem_1006); arg563_1 = getitem_1006 = copy__415 = None + copy__416: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg564_1, getitem_1007); arg564_1 = getitem_1007 = copy__416 = None + copy__417: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg565_1, getitem_1008); arg565_1 = getitem_1008 = copy__417 = None + copy__418: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg566_1, getitem_1009); arg566_1 = getitem_1009 = copy__418 = None + copy__419: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg567_1, getitem_1010); arg567_1 = getitem_1010 = copy__419 = None + copy__420: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg568_1, getitem_1011); arg568_1 = getitem_1011 = copy__420 = None + copy__421: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg569_1, getitem_1012); arg569_1 = getitem_1012 = copy__421 = None + copy__422: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg570_1, getitem_1013); arg570_1 = getitem_1013 = copy__422 = None + copy__423: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg571_1, getitem_1014); arg571_1 = getitem_1014 = copy__423 = None + copy__424: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg572_1, getitem_1015); arg572_1 = getitem_1015 = copy__424 = None + copy__425: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg573_1, getitem_1016); arg573_1 = getitem_1016 = copy__425 = None + copy__426: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg574_1, getitem_1017); arg574_1 = getitem_1017 = copy__426 = None + copy__427: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg575_1, getitem_1018); arg575_1 = getitem_1018 = copy__427 = None + copy__428: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg576_1, getitem_1019); arg576_1 = getitem_1019 = copy__428 = None + copy__429: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg577_1, getitem_1020); arg577_1 = getitem_1020 = copy__429 = None + copy__430: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg578_1, getitem_1021); arg578_1 = getitem_1021 = copy__430 = None + copy__431: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg579_1, getitem_1022); arg579_1 = getitem_1022 = copy__431 = None + copy__432: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg580_1, getitem_1023); arg580_1 = getitem_1023 = copy__432 = None + copy__433: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg581_1, getitem_1024); arg581_1 = getitem_1024 = copy__433 = None + copy__434: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg582_1, getitem_1025); arg582_1 = getitem_1025 = copy__434 = None + copy__435: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg583_1, getitem_1026); arg583_1 = getitem_1026 = copy__435 = None + copy__436: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg584_1, getitem_1027); arg584_1 = getitem_1027 = copy__436 = None + copy__437: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg585_1, getitem_1028); arg585_1 = getitem_1028 = copy__437 = None + copy__438: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg586_1, getitem_1029); arg586_1 = getitem_1029 = copy__438 = None + copy__439: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg587_1, getitem_1030); arg587_1 = getitem_1030 = copy__439 = None + copy__440: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg588_1, getitem_1031); arg588_1 = getitem_1031 = copy__440 = None + copy__441: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg589_1, getitem_1032); arg589_1 = getitem_1032 = copy__441 = None + copy__442: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg590_1, getitem_1033); arg590_1 = getitem_1033 = copy__442 = None + copy__443: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg591_1, getitem_1034); arg591_1 = getitem_1034 = copy__443 = None + copy__444: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg592_1, getitem_1035); arg592_1 = getitem_1035 = copy__444 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1) + copy__445: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg593_1, getitem); arg593_1 = getitem = copy__445 = None + copy__446: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg594_1, getitem_2); arg594_1 = getitem_2 = copy__446 = None + copy__447: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg595_1, getitem_3); arg595_1 = getitem_3 = copy__447 = None + copy__448: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg596_1, getitem_4); arg596_1 = getitem_4 = copy__448 = None + copy__449: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg597_1, getitem_5); arg597_1 = getitem_5 = copy__449 = None + copy__450: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg598_1, getitem_6); arg598_1 = getitem_6 = copy__450 = None + copy__451: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg599_1, getitem_7); arg599_1 = getitem_7 = copy__451 = None + copy__452: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg600_1, getitem_8); arg600_1 = getitem_8 = copy__452 = None + copy__453: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg601_1, getitem_9); arg601_1 = getitem_9 = copy__453 = None + copy__454: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg602_1, getitem_10); arg602_1 = getitem_10 = copy__454 = None + copy__455: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg603_1, getitem_11); arg603_1 = getitem_11 = copy__455 = None + copy__456: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg604_1, getitem_12); arg604_1 = getitem_12 = copy__456 = None + copy__457: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg605_1, getitem_13); arg605_1 = getitem_13 = copy__457 = None + copy__458: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg606_1, getitem_14); arg606_1 = getitem_14 = copy__458 = None + copy__459: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg607_1, getitem_15); arg607_1 = getitem_15 = copy__459 = None + copy__460: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg608_1, getitem_16); arg608_1 = getitem_16 = copy__460 = None + copy__461: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg609_1, getitem_17); arg609_1 = getitem_17 = copy__461 = None + copy__462: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg610_1, getitem_18); arg610_1 = getitem_18 = copy__462 = None + copy__463: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg611_1, getitem_19); arg611_1 = getitem_19 = copy__463 = None + copy__464: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg612_1, getitem_20); arg612_1 = getitem_20 = copy__464 = None + copy__465: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg613_1, getitem_21); arg613_1 = getitem_21 = copy__465 = None + copy__466: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg614_1, getitem_22); arg614_1 = getitem_22 = copy__466 = None + copy__467: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg615_1, getitem_23); arg615_1 = getitem_23 = copy__467 = None + copy__468: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg616_1, getitem_24); arg616_1 = getitem_24 = copy__468 = None + copy__469: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg617_1, getitem_25); arg617_1 = getitem_25 = copy__469 = None + copy__470: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg618_1, getitem_26); arg618_1 = getitem_26 = copy__470 = None + copy__471: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg619_1, getitem_27); arg619_1 = getitem_27 = copy__471 = None + copy__472: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg620_1, getitem_28); arg620_1 = getitem_28 = copy__472 = None + copy__473: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg621_1, getitem_29); arg621_1 = getitem_29 = copy__473 = None + copy__474: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg622_1, getitem_30); arg622_1 = getitem_30 = copy__474 = None + copy__475: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg623_1, getitem_31); arg623_1 = getitem_31 = copy__475 = None + copy__476: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg624_1, getitem_32); arg624_1 = getitem_32 = copy__476 = None + copy__477: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg625_1, getitem_33); arg625_1 = getitem_33 = copy__477 = None + copy__478: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg626_1, getitem_34); arg626_1 = getitem_34 = copy__478 = None + copy__479: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg627_1, getitem_35); arg627_1 = getitem_35 = copy__479 = None + copy__480: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg628_1, getitem_36); arg628_1 = getitem_36 = copy__480 = None + copy__481: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg629_1, getitem_37); arg629_1 = getitem_37 = copy__481 = None + copy__482: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg630_1, getitem_38); arg630_1 = getitem_38 = copy__482 = None + copy__483: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg631_1, getitem_39); arg631_1 = getitem_39 = copy__483 = None + copy__484: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg632_1, getitem_40); arg632_1 = getitem_40 = copy__484 = None + copy__485: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg633_1, getitem_41); arg633_1 = getitem_41 = copy__485 = None + copy__486: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg634_1, getitem_42); arg634_1 = getitem_42 = copy__486 = None + copy__487: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg635_1, getitem_43); arg635_1 = getitem_43 = copy__487 = None + copy__488: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg636_1, getitem_44); arg636_1 = getitem_44 = copy__488 = None + copy__489: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg637_1, getitem_45); arg637_1 = getitem_45 = copy__489 = None + copy__490: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg638_1, getitem_46); arg638_1 = getitem_46 = copy__490 = None + copy__491: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg639_1, getitem_47); arg639_1 = getitem_47 = copy__491 = None + copy__492: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg640_1, getitem_48); arg640_1 = getitem_48 = copy__492 = None + copy__493: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg641_1, getitem_49); arg641_1 = getitem_49 = copy__493 = None + copy__494: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg642_1, getitem_50); arg642_1 = getitem_50 = copy__494 = None + copy__495: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg643_1, getitem_51); arg643_1 = getitem_51 = copy__495 = None + copy__496: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg644_1, getitem_52); arg644_1 = getitem_52 = copy__496 = None + copy__497: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg645_1, getitem_53); arg645_1 = getitem_53 = copy__497 = None + copy__498: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg646_1, getitem_54); arg646_1 = getitem_54 = copy__498 = None + copy__499: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg647_1, getitem_55); arg647_1 = getitem_55 = copy__499 = None + copy__500: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg648_1, getitem_56); arg648_1 = getitem_56 = copy__500 = None + copy__501: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg649_1, getitem_57); arg649_1 = getitem_57 = copy__501 = None + copy__502: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg650_1, getitem_58); arg650_1 = getitem_58 = copy__502 = None + copy__503: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg651_1, getitem_59); arg651_1 = getitem_59 = copy__503 = None + copy__504: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg652_1, getitem_60); arg652_1 = getitem_60 = copy__504 = None + copy__505: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg653_1, getitem_61); arg653_1 = getitem_61 = copy__505 = None + copy__506: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg654_1, getitem_62); arg654_1 = getitem_62 = copy__506 = None + copy__507: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg655_1, getitem_63); arg655_1 = getitem_63 = copy__507 = None + copy__508: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg656_1, getitem_64); arg656_1 = getitem_64 = copy__508 = None + copy__509: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg657_1, getitem_65); arg657_1 = getitem_65 = copy__509 = None + copy__510: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg658_1, getitem_66); arg658_1 = getitem_66 = copy__510 = None + copy__511: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg659_1, getitem_67); arg659_1 = getitem_67 = copy__511 = None + copy__512: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg660_1, getitem_68); arg660_1 = getitem_68 = copy__512 = None + copy__513: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg661_1, getitem_69); arg661_1 = getitem_69 = copy__513 = None + copy__514: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg662_1, getitem_70); arg662_1 = getitem_70 = copy__514 = None + copy__515: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg663_1, getitem_71); arg663_1 = getitem_71 = copy__515 = None + copy__516: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg664_1, getitem_72); arg664_1 = getitem_72 = copy__516 = None + copy__517: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg665_1, getitem_73); arg665_1 = getitem_73 = copy__517 = None + copy__518: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg666_1, getitem_74); arg666_1 = getitem_74 = copy__518 = None + copy__519: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg667_1, getitem_75); arg667_1 = getitem_75 = copy__519 = None + copy__520: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg668_1, getitem_76); arg668_1 = getitem_76 = copy__520 = None + copy__521: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg669_1, getitem_77); arg669_1 = getitem_77 = copy__521 = None + copy__522: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg670_1, getitem_78); arg670_1 = getitem_78 = copy__522 = None + copy__523: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg671_1, getitem_79); arg671_1 = getitem_79 = copy__523 = None + copy__524: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg672_1, getitem_80); arg672_1 = getitem_80 = copy__524 = None + copy__525: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg673_1, getitem_81); arg673_1 = getitem_81 = copy__525 = None + copy__526: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg674_1, getitem_82); arg674_1 = getitem_82 = copy__526 = None + copy__527: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg675_1, getitem_83); arg675_1 = getitem_83 = copy__527 = None + copy__528: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg676_1, getitem_84); arg676_1 = getitem_84 = copy__528 = None + copy__529: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg677_1, getitem_85); arg677_1 = getitem_85 = copy__529 = None + copy__530: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg678_1, getitem_86); arg678_1 = getitem_86 = copy__530 = None + copy__531: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg679_1, getitem_87); arg679_1 = getitem_87 = copy__531 = None + copy__532: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg680_1, getitem_88); arg680_1 = getitem_88 = copy__532 = None + copy__533: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg681_1, getitem_89); arg681_1 = getitem_89 = copy__533 = None + copy__534: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg682_1, getitem_90); arg682_1 = getitem_90 = copy__534 = None + copy__535: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg683_1, getitem_91); arg683_1 = getitem_91 = copy__535 = None + copy__536: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg684_1, getitem_92); arg684_1 = getitem_92 = copy__536 = None + copy__537: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg685_1, getitem_93); arg685_1 = getitem_93 = copy__537 = None + copy__538: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg686_1, getitem_94); arg686_1 = getitem_94 = copy__538 = None + copy__539: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg687_1, getitem_95); arg687_1 = getitem_95 = copy__539 = None + copy__540: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg688_1, getitem_96); arg688_1 = getitem_96 = copy__540 = None + copy__541: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg689_1, getitem_97); arg689_1 = getitem_97 = copy__541 = None + copy__542: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg690_1, getitem_98); arg690_1 = getitem_98 = copy__542 = None + copy__543: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg691_1, getitem_99); arg691_1 = getitem_99 = copy__543 = None + copy__544: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg692_1, getitem_100); arg692_1 = getitem_100 = copy__544 = None + copy__545: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg693_1, getitem_101); arg693_1 = getitem_101 = copy__545 = None + copy__546: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg694_1, getitem_102); arg694_1 = getitem_102 = copy__546 = None + copy__547: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg695_1, getitem_103); arg695_1 = getitem_103 = copy__547 = None + copy__548: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg696_1, getitem_104); arg696_1 = getitem_104 = copy__548 = None + copy__549: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg697_1, getitem_105); arg697_1 = getitem_105 = copy__549 = None + copy__550: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg698_1, getitem_106); arg698_1 = getitem_106 = copy__550 = None + copy__551: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg699_1, getitem_107); arg699_1 = getitem_107 = copy__551 = None + copy__552: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg700_1, getitem_108); arg700_1 = getitem_108 = copy__552 = None + copy__553: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg701_1, getitem_109); arg701_1 = getitem_109 = copy__553 = None + copy__554: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg702_1, getitem_110); arg702_1 = getitem_110 = copy__554 = None + copy__555: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg703_1, getitem_111); arg703_1 = getitem_111 = copy__555 = None + copy__556: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg704_1, getitem_112); arg704_1 = getitem_112 = copy__556 = None + copy__557: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg705_1, getitem_113); arg705_1 = getitem_113 = copy__557 = None + copy__558: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg706_1, getitem_114); arg706_1 = getitem_114 = copy__558 = None + copy__559: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg707_1, getitem_115); arg707_1 = getitem_115 = copy__559 = None + copy__560: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg708_1, getitem_116); arg708_1 = getitem_116 = copy__560 = None + copy__561: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg709_1, getitem_117); arg709_1 = getitem_117 = copy__561 = None + copy__562: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg710_1, getitem_118); arg710_1 = getitem_118 = copy__562 = None + copy__563: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg711_1, getitem_119); arg711_1 = getitem_119 = copy__563 = None + copy__564: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg712_1, getitem_120); arg712_1 = getitem_120 = copy__564 = None + copy__565: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg713_1, getitem_121); arg713_1 = getitem_121 = copy__565 = None + copy__566: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg714_1, getitem_122); arg714_1 = getitem_122 = copy__566 = None + copy__567: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg715_1, getitem_123); arg715_1 = getitem_123 = copy__567 = None + copy__568: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg716_1, getitem_124); arg716_1 = getitem_124 = copy__568 = None + copy__569: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg717_1, getitem_125); arg717_1 = getitem_125 = copy__569 = None + copy__570: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg718_1, getitem_126); arg718_1 = getitem_126 = copy__570 = None + copy__571: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg719_1, getitem_127); arg719_1 = getitem_127 = copy__571 = None + copy__572: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg720_1, getitem_128); arg720_1 = getitem_128 = copy__572 = None + copy__573: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg721_1, getitem_129); arg721_1 = getitem_129 = copy__573 = None + copy__574: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg722_1, getitem_130); arg722_1 = getitem_130 = copy__574 = None + copy__575: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg723_1, getitem_131); arg723_1 = getitem_131 = copy__575 = None + copy__576: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg724_1, getitem_132); arg724_1 = getitem_132 = copy__576 = None + copy__577: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg725_1, getitem_133); arg725_1 = getitem_133 = copy__577 = None + copy__578: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg726_1, getitem_134); arg726_1 = getitem_134 = copy__578 = None + copy__579: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg727_1, getitem_135); arg727_1 = getitem_135 = copy__579 = None + copy__580: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg728_1, getitem_136); arg728_1 = getitem_136 = copy__580 = None + copy__581: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg729_1, getitem_137); arg729_1 = getitem_137 = copy__581 = None + copy__582: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg730_1, getitem_138); arg730_1 = getitem_138 = copy__582 = None + copy__583: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg731_1, getitem_139); arg731_1 = getitem_139 = copy__583 = None + copy__584: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg732_1, getitem_140); arg732_1 = getitem_140 = copy__584 = None + copy__585: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg733_1, getitem_141); arg733_1 = getitem_141 = copy__585 = None + copy__586: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg734_1, getitem_142); arg734_1 = getitem_142 = copy__586 = None + copy__587: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg735_1, getitem_143); arg735_1 = getitem_143 = copy__587 = None + copy__588: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg736_1, getitem_144); arg736_1 = getitem_144 = copy__588 = None + copy__589: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg737_1, getitem_145); arg737_1 = getitem_145 = copy__589 = None + copy__590: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg738_1, getitem_146); arg738_1 = getitem_146 = copy__590 = None + copy__591: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg739_1, getitem_147); arg739_1 = getitem_147 = copy__591 = None + return () + +V0806 13:56:10.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "da6c5df15068ccb1dd8f153f4756d68b"} + { + "name": "compile_fx..fw_compiler_base", + "ts": 1722977770005635.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:10.110000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9b1896e9c51f43a22f7ad4255b477e14"} + { + "name": "compile_fx_inner", + "ts": 1722977770110168.0, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:10.110000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "81061d19d2bd08c1aaf530a4a0981740"} + { + "name": "inductor_compile", + "ts": 1722977770110407.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:10.815000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "0d4da5461779497f73f310d8bf96bf31"} + + import torch + from torch import tensor, device + import torch.fx as fx + from torch._dynamo.testing import rand_strided + from math import inf + import torch._inductor.inductor_prims + + import torch._dynamo.config + import torch._inductor.config + import torch._functorch.config + import torch.fx.experimental._config + + torch._inductor.config.triton.cudagraphs = True + torch._functorch.config.unlift_effect_tokens = True + + + + isolate_fails_code_str = None + + + + # torch version: 2.5.0a0+git6fbc72b + # torch cuda version: 12.0 + # torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d + + + # CUDA Info: + # nvcc: NVIDIA (R) Cuda compiler driver + # Copyright (c) 2005-2023 NVIDIA Corporation + # Built on Fri_Jan__6_16:45:21_PST_2023 + # Cuda compilation tools, release 12.0, V12.0.140 + # Build cuda_12.0.r12.0/compiler.32267302_0 + + # GPU Hardware Info: + # NVIDIA H100 : 1 + + + from torch.nn import * + class Repro(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + + + def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1): + _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1) + getitem = _foreach_add[0] + getitem_1 = _foreach_add[1] + getitem_2 = _foreach_add[2] + getitem_3 = _foreach_add[3] + getitem_4 = _foreach_add[4] + getitem_5 = _foreach_add[5] + getitem_6 = _foreach_add[6] + getitem_7 = _foreach_add[7] + getitem_8 = _foreach_add[8] + getitem_9 = _foreach_add[9] + getitem_10 = _foreach_add[10] + getitem_11 = _foreach_add[11] + getitem_12 = _foreach_add[12] + getitem_13 = _foreach_add[13] + getitem_14 = _foreach_add[14] + getitem_15 = _foreach_add[15] + getitem_16 = _foreach_add[16] + getitem_17 = _foreach_add[17] + getitem_18 = _foreach_add[18] + getitem_19 = _foreach_add[19] + getitem_20 = _foreach_add[20] + getitem_21 = _foreach_add[21] + getitem_22 = _foreach_add[22] + getitem_23 = _foreach_add[23] + getitem_24 = _foreach_add[24] + getitem_25 = _foreach_add[25] + getitem_26 = _foreach_add[26] + getitem_27 = _foreach_add[27] + getitem_28 = _foreach_add[28] + getitem_29 = _foreach_add[29] + getitem_30 = _foreach_add[30] + getitem_31 = _foreach_add[31] + getitem_32 = _foreach_add[32] + getitem_33 = _foreach_add[33] + getitem_34 = _foreach_add[34] + getitem_35 = _foreach_add[35] + getitem_36 = _foreach_add[36] + getitem_37 = _foreach_add[37] + getitem_38 = _foreach_add[38] + getitem_39 = _foreach_add[39] + getitem_40 = _foreach_add[40] + getitem_41 = _foreach_add[41] + getitem_42 = _foreach_add[42] + getitem_43 = _foreach_add[43] + getitem_44 = _foreach_add[44] + getitem_45 = _foreach_add[45] + getitem_46 = _foreach_add[46] + getitem_47 = _foreach_add[47] + getitem_48 = _foreach_add[48] + getitem_49 = _foreach_add[49] + getitem_50 = _foreach_add[50] + getitem_51 = _foreach_add[51] + getitem_52 = _foreach_add[52] + getitem_53 = _foreach_add[53] + getitem_54 = _foreach_add[54] + getitem_55 = _foreach_add[55] + getitem_56 = _foreach_add[56] + getitem_57 = _foreach_add[57] + getitem_58 = _foreach_add[58] + getitem_59 = _foreach_add[59] + getitem_60 = _foreach_add[60] + getitem_61 = _foreach_add[61] + getitem_62 = _foreach_add[62] + getitem_63 = _foreach_add[63] + getitem_64 = _foreach_add[64] + getitem_65 = _foreach_add[65] + getitem_66 = _foreach_add[66] + getitem_67 = _foreach_add[67] + getitem_68 = _foreach_add[68] + getitem_69 = _foreach_add[69] + getitem_70 = _foreach_add[70] + getitem_71 = _foreach_add[71] + getitem_72 = _foreach_add[72] + getitem_73 = _foreach_add[73] + getitem_74 = _foreach_add[74] + getitem_75 = _foreach_add[75] + getitem_76 = _foreach_add[76] + getitem_77 = _foreach_add[77] + getitem_78 = _foreach_add[78] + getitem_79 = _foreach_add[79] + getitem_80 = _foreach_add[80] + getitem_81 = _foreach_add[81] + getitem_82 = _foreach_add[82] + getitem_83 = _foreach_add[83] + getitem_84 = _foreach_add[84] + getitem_85 = _foreach_add[85] + getitem_86 = _foreach_add[86] + getitem_87 = _foreach_add[87] + getitem_88 = _foreach_add[88] + getitem_89 = _foreach_add[89] + getitem_90 = _foreach_add[90] + getitem_91 = _foreach_add[91] + getitem_92 = _foreach_add[92] + getitem_93 = _foreach_add[93] + getitem_94 = _foreach_add[94] + getitem_95 = _foreach_add[95] + getitem_96 = _foreach_add[96] + getitem_97 = _foreach_add[97] + getitem_98 = _foreach_add[98] + getitem_99 = _foreach_add[99] + getitem_100 = _foreach_add[100] + getitem_101 = _foreach_add[101] + getitem_102 = _foreach_add[102] + getitem_103 = _foreach_add[103] + getitem_104 = _foreach_add[104] + getitem_105 = _foreach_add[105] + getitem_106 = _foreach_add[106] + getitem_107 = _foreach_add[107] + getitem_108 = _foreach_add[108] + getitem_109 = _foreach_add[109] + getitem_110 = _foreach_add[110] + getitem_111 = _foreach_add[111] + getitem_112 = _foreach_add[112] + getitem_113 = _foreach_add[113] + getitem_114 = _foreach_add[114] + getitem_115 = _foreach_add[115] + getitem_116 = _foreach_add[116] + getitem_117 = _foreach_add[117] + getitem_118 = _foreach_add[118] + getitem_119 = _foreach_add[119] + getitem_120 = _foreach_add[120] + getitem_121 = _foreach_add[121] + getitem_122 = _foreach_add[122] + getitem_123 = _foreach_add[123] + getitem_124 = _foreach_add[124] + getitem_125 = _foreach_add[125] + getitem_126 = _foreach_add[126] + getitem_127 = _foreach_add[127] + getitem_128 = _foreach_add[128] + getitem_129 = _foreach_add[129] + getitem_130 = _foreach_add[130] + getitem_131 = _foreach_add[131] + getitem_132 = _foreach_add[132] + getitem_133 = _foreach_add[133] + getitem_134 = _foreach_add[134] + getitem_135 = _foreach_add[135] + getitem_136 = _foreach_add[136] + getitem_137 = _foreach_add[137] + getitem_138 = _foreach_add[138] + getitem_139 = _foreach_add[139] + getitem_140 = _foreach_add[140] + getitem_141 = _foreach_add[141] + getitem_142 = _foreach_add[142] + getitem_143 = _foreach_add[143] + getitem_144 = _foreach_add[144] + getitem_145 = _foreach_add[145] + getitem_146 = _foreach_add[146] + getitem_147 = _foreach_add[147]; _foreach_add = None + _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1]) + getitem_148 = _foreach_sub[0] + getitem_149 = _foreach_sub[1] + getitem_150 = _foreach_sub[2] + getitem_151 = _foreach_sub[3] + getitem_152 = _foreach_sub[4] + getitem_153 = _foreach_sub[5] + getitem_154 = _foreach_sub[6] + getitem_155 = _foreach_sub[7] + getitem_156 = _foreach_sub[8] + getitem_157 = _foreach_sub[9] + getitem_158 = _foreach_sub[10] + getitem_159 = _foreach_sub[11] + getitem_160 = _foreach_sub[12] + getitem_161 = _foreach_sub[13] + getitem_162 = _foreach_sub[14] + getitem_163 = _foreach_sub[15] + getitem_164 = _foreach_sub[16] + getitem_165 = _foreach_sub[17] + getitem_166 = _foreach_sub[18] + getitem_167 = _foreach_sub[19] + getitem_168 = _foreach_sub[20] + getitem_169 = _foreach_sub[21] + getitem_170 = _foreach_sub[22] + getitem_171 = _foreach_sub[23] + getitem_172 = _foreach_sub[24] + getitem_173 = _foreach_sub[25] + getitem_174 = _foreach_sub[26] + getitem_175 = _foreach_sub[27] + getitem_176 = _foreach_sub[28] + getitem_177 = _foreach_sub[29] + getitem_178 = _foreach_sub[30] + getitem_179 = _foreach_sub[31] + getitem_180 = _foreach_sub[32] + getitem_181 = _foreach_sub[33] + getitem_182 = _foreach_sub[34] + getitem_183 = _foreach_sub[35] + getitem_184 = _foreach_sub[36] + getitem_185 = _foreach_sub[37] + getitem_186 = _foreach_sub[38] + getitem_187 = _foreach_sub[39] + getitem_188 = _foreach_sub[40] + getitem_189 = _foreach_sub[41] + getitem_190 = _foreach_sub[42] + getitem_191 = _foreach_sub[43] + getitem_192 = _foreach_sub[44] + getitem_193 = _foreach_sub[45] + getitem_194 = _foreach_sub[46] + getitem_195 = _foreach_sub[47] + getitem_196 = _foreach_sub[48] + getitem_197 = _foreach_sub[49] + getitem_198 = _foreach_sub[50] + getitem_199 = _foreach_sub[51] + getitem_200 = _foreach_sub[52] + getitem_201 = _foreach_sub[53] + getitem_202 = _foreach_sub[54] + getitem_203 = _foreach_sub[55] + getitem_204 = _foreach_sub[56] + getitem_205 = _foreach_sub[57] + getitem_206 = _foreach_sub[58] + getitem_207 = _foreach_sub[59] + getitem_208 = _foreach_sub[60] + getitem_209 = _foreach_sub[61] + getitem_210 = _foreach_sub[62] + getitem_211 = _foreach_sub[63] + getitem_212 = _foreach_sub[64] + getitem_213 = _foreach_sub[65] + getitem_214 = _foreach_sub[66] + getitem_215 = _foreach_sub[67] + getitem_216 = _foreach_sub[68] + getitem_217 = _foreach_sub[69] + getitem_218 = _foreach_sub[70] + getitem_219 = _foreach_sub[71] + getitem_220 = _foreach_sub[72] + getitem_221 = _foreach_sub[73] + getitem_222 = _foreach_sub[74] + getitem_223 = _foreach_sub[75] + getitem_224 = _foreach_sub[76] + getitem_225 = _foreach_sub[77] + getitem_226 = _foreach_sub[78] + getitem_227 = _foreach_sub[79] + getitem_228 = _foreach_sub[80] + getitem_229 = _foreach_sub[81] + getitem_230 = _foreach_sub[82] + getitem_231 = _foreach_sub[83] + getitem_232 = _foreach_sub[84] + getitem_233 = _foreach_sub[85] + getitem_234 = _foreach_sub[86] + getitem_235 = _foreach_sub[87] + getitem_236 = _foreach_sub[88] + getitem_237 = _foreach_sub[89] + getitem_238 = _foreach_sub[90] + getitem_239 = _foreach_sub[91] + getitem_240 = _foreach_sub[92] + getitem_241 = _foreach_sub[93] + getitem_242 = _foreach_sub[94] + getitem_243 = _foreach_sub[95] + getitem_244 = _foreach_sub[96] + getitem_245 = _foreach_sub[97] + getitem_246 = _foreach_sub[98] + getitem_247 = _foreach_sub[99] + getitem_248 = _foreach_sub[100] + getitem_249 = _foreach_sub[101] + getitem_250 = _foreach_sub[102] + getitem_251 = _foreach_sub[103] + getitem_252 = _foreach_sub[104] + getitem_253 = _foreach_sub[105] + getitem_254 = _foreach_sub[106] + getitem_255 = _foreach_sub[107] + getitem_256 = _foreach_sub[108] + getitem_257 = _foreach_sub[109] + getitem_258 = _foreach_sub[110] + getitem_259 = _foreach_sub[111] + getitem_260 = _foreach_sub[112] + getitem_261 = _foreach_sub[113] + getitem_262 = _foreach_sub[114] + getitem_263 = _foreach_sub[115] + getitem_264 = _foreach_sub[116] + getitem_265 = _foreach_sub[117] + getitem_266 = _foreach_sub[118] + getitem_267 = _foreach_sub[119] + getitem_268 = _foreach_sub[120] + getitem_269 = _foreach_sub[121] + getitem_270 = _foreach_sub[122] + getitem_271 = _foreach_sub[123] + getitem_272 = _foreach_sub[124] + getitem_273 = _foreach_sub[125] + getitem_274 = _foreach_sub[126] + getitem_275 = _foreach_sub[127] + getitem_276 = _foreach_sub[128] + getitem_277 = _foreach_sub[129] + getitem_278 = _foreach_sub[130] + getitem_279 = _foreach_sub[131] + getitem_280 = _foreach_sub[132] + getitem_281 = _foreach_sub[133] + getitem_282 = _foreach_sub[134] + getitem_283 = _foreach_sub[135] + getitem_284 = _foreach_sub[136] + getitem_285 = _foreach_sub[137] + getitem_286 = _foreach_sub[138] + getitem_287 = _foreach_sub[139] + getitem_288 = _foreach_sub[140] + getitem_289 = _foreach_sub[141] + getitem_290 = _foreach_sub[142] + getitem_291 = _foreach_sub[143] + getitem_292 = _foreach_sub[144] + getitem_293 = _foreach_sub[145] + getitem_294 = _foreach_sub[146] + getitem_295 = _foreach_sub[147]; _foreach_sub = None + _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998); getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None + getitem_296 = _foreach_mul[0] + getitem_297 = _foreach_mul[1] + getitem_298 = _foreach_mul[2] + getitem_299 = _foreach_mul[3] + getitem_300 = _foreach_mul[4] + getitem_301 = _foreach_mul[5] + getitem_302 = _foreach_mul[6] + getitem_303 = _foreach_mul[7] + getitem_304 = _foreach_mul[8] + getitem_305 = _foreach_mul[9] + getitem_306 = _foreach_mul[10] + getitem_307 = _foreach_mul[11] + getitem_308 = _foreach_mul[12] + getitem_309 = _foreach_mul[13] + getitem_310 = _foreach_mul[14] + getitem_311 = _foreach_mul[15] + getitem_312 = _foreach_mul[16] + getitem_313 = _foreach_mul[17] + getitem_314 = _foreach_mul[18] + getitem_315 = _foreach_mul[19] + getitem_316 = _foreach_mul[20] + getitem_317 = _foreach_mul[21] + getitem_318 = _foreach_mul[22] + getitem_319 = _foreach_mul[23] + getitem_320 = _foreach_mul[24] + getitem_321 = _foreach_mul[25] + getitem_322 = _foreach_mul[26] + getitem_323 = _foreach_mul[27] + getitem_324 = _foreach_mul[28] + getitem_325 = _foreach_mul[29] + getitem_326 = _foreach_mul[30] + getitem_327 = _foreach_mul[31] + getitem_328 = _foreach_mul[32] + getitem_329 = _foreach_mul[33] + getitem_330 = _foreach_mul[34] + getitem_331 = _foreach_mul[35] + getitem_332 = _foreach_mul[36] + getitem_333 = _foreach_mul[37] + getitem_334 = _foreach_mul[38] + getitem_335 = _foreach_mul[39] + getitem_336 = _foreach_mul[40] + getitem_337 = _foreach_mul[41] + getitem_338 = _foreach_mul[42] + getitem_339 = _foreach_mul[43] + getitem_340 = _foreach_mul[44] + getitem_341 = _foreach_mul[45] + getitem_342 = _foreach_mul[46] + getitem_343 = _foreach_mul[47] + getitem_344 = _foreach_mul[48] + getitem_345 = _foreach_mul[49] + getitem_346 = _foreach_mul[50] + getitem_347 = _foreach_mul[51] + getitem_348 = _foreach_mul[52] + getitem_349 = _foreach_mul[53] + getitem_350 = _foreach_mul[54] + getitem_351 = _foreach_mul[55] + getitem_352 = _foreach_mul[56] + getitem_353 = _foreach_mul[57] + getitem_354 = _foreach_mul[58] + getitem_355 = _foreach_mul[59] + getitem_356 = _foreach_mul[60] + getitem_357 = _foreach_mul[61] + getitem_358 = _foreach_mul[62] + getitem_359 = _foreach_mul[63] + getitem_360 = _foreach_mul[64] + getitem_361 = _foreach_mul[65] + getitem_362 = _foreach_mul[66] + getitem_363 = _foreach_mul[67] + getitem_364 = _foreach_mul[68] + getitem_365 = _foreach_mul[69] + getitem_366 = _foreach_mul[70] + getitem_367 = _foreach_mul[71] + getitem_368 = _foreach_mul[72] + getitem_369 = _foreach_mul[73] + getitem_370 = _foreach_mul[74] + getitem_371 = _foreach_mul[75] + getitem_372 = _foreach_mul[76] + getitem_373 = _foreach_mul[77] + getitem_374 = _foreach_mul[78] + getitem_375 = _foreach_mul[79] + getitem_376 = _foreach_mul[80] + getitem_377 = _foreach_mul[81] + getitem_378 = _foreach_mul[82] + getitem_379 = _foreach_mul[83] + getitem_380 = _foreach_mul[84] + getitem_381 = _foreach_mul[85] + getitem_382 = _foreach_mul[86] + getitem_383 = _foreach_mul[87] + getitem_384 = _foreach_mul[88] + getitem_385 = _foreach_mul[89] + getitem_386 = _foreach_mul[90] + getitem_387 = _foreach_mul[91] + getitem_388 = _foreach_mul[92] + getitem_389 = _foreach_mul[93] + getitem_390 = _foreach_mul[94] + getitem_391 = _foreach_mul[95] + getitem_392 = _foreach_mul[96] + getitem_393 = _foreach_mul[97] + getitem_394 = _foreach_mul[98] + getitem_395 = _foreach_mul[99] + getitem_396 = _foreach_mul[100] + getitem_397 = _foreach_mul[101] + getitem_398 = _foreach_mul[102] + getitem_399 = _foreach_mul[103] + getitem_400 = _foreach_mul[104] + getitem_401 = _foreach_mul[105] + getitem_402 = _foreach_mul[106] + getitem_403 = _foreach_mul[107] + getitem_404 = _foreach_mul[108] + getitem_405 = _foreach_mul[109] + getitem_406 = _foreach_mul[110] + getitem_407 = _foreach_mul[111] + getitem_408 = _foreach_mul[112] + getitem_409 = _foreach_mul[113] + getitem_410 = _foreach_mul[114] + getitem_411 = _foreach_mul[115] + getitem_412 = _foreach_mul[116] + getitem_413 = _foreach_mul[117] + getitem_414 = _foreach_mul[118] + getitem_415 = _foreach_mul[119] + getitem_416 = _foreach_mul[120] + getitem_417 = _foreach_mul[121] + getitem_418 = _foreach_mul[122] + getitem_419 = _foreach_mul[123] + getitem_420 = _foreach_mul[124] + getitem_421 = _foreach_mul[125] + getitem_422 = _foreach_mul[126] + getitem_423 = _foreach_mul[127] + getitem_424 = _foreach_mul[128] + getitem_425 = _foreach_mul[129] + getitem_426 = _foreach_mul[130] + getitem_427 = _foreach_mul[131] + getitem_428 = _foreach_mul[132] + getitem_429 = _foreach_mul[133] + getitem_430 = _foreach_mul[134] + getitem_431 = _foreach_mul[135] + getitem_432 = _foreach_mul[136] + getitem_433 = _foreach_mul[137] + getitem_434 = _foreach_mul[138] + getitem_435 = _foreach_mul[139] + getitem_436 = _foreach_mul[140] + getitem_437 = _foreach_mul[141] + getitem_438 = _foreach_mul[142] + getitem_439 = _foreach_mul[143] + getitem_440 = _foreach_mul[144] + getitem_441 = _foreach_mul[145] + getitem_442 = _foreach_mul[146] + getitem_443 = _foreach_mul[147]; _foreach_mul = None + _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]); getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None + getitem_444 = _foreach_add_1[0] + getitem_445 = _foreach_add_1[1] + getitem_446 = _foreach_add_1[2] + getitem_447 = _foreach_add_1[3] + getitem_448 = _foreach_add_1[4] + getitem_449 = _foreach_add_1[5] + getitem_450 = _foreach_add_1[6] + getitem_451 = _foreach_add_1[7] + getitem_452 = _foreach_add_1[8] + getitem_453 = _foreach_add_1[9] + getitem_454 = _foreach_add_1[10] + getitem_455 = _foreach_add_1[11] + getitem_456 = _foreach_add_1[12] + getitem_457 = _foreach_add_1[13] + getitem_458 = _foreach_add_1[14] + getitem_459 = _foreach_add_1[15] + getitem_460 = _foreach_add_1[16] + getitem_461 = _foreach_add_1[17] + getitem_462 = _foreach_add_1[18] + getitem_463 = _foreach_add_1[19] + getitem_464 = _foreach_add_1[20] + getitem_465 = _foreach_add_1[21] + getitem_466 = _foreach_add_1[22] + getitem_467 = _foreach_add_1[23] + getitem_468 = _foreach_add_1[24] + getitem_469 = _foreach_add_1[25] + getitem_470 = _foreach_add_1[26] + getitem_471 = _foreach_add_1[27] + getitem_472 = _foreach_add_1[28] + getitem_473 = _foreach_add_1[29] + getitem_474 = _foreach_add_1[30] + getitem_475 = _foreach_add_1[31] + getitem_476 = _foreach_add_1[32] + getitem_477 = _foreach_add_1[33] + getitem_478 = _foreach_add_1[34] + getitem_479 = _foreach_add_1[35] + getitem_480 = _foreach_add_1[36] + getitem_481 = _foreach_add_1[37] + getitem_482 = _foreach_add_1[38] + getitem_483 = _foreach_add_1[39] + getitem_484 = _foreach_add_1[40] + getitem_485 = _foreach_add_1[41] + getitem_486 = _foreach_add_1[42] + getitem_487 = _foreach_add_1[43] + getitem_488 = _foreach_add_1[44] + getitem_489 = _foreach_add_1[45] + getitem_490 = _foreach_add_1[46] + getitem_491 = _foreach_add_1[47] + getitem_492 = _foreach_add_1[48] + getitem_493 = _foreach_add_1[49] + getitem_494 = _foreach_add_1[50] + getitem_495 = _foreach_add_1[51] + getitem_496 = _foreach_add_1[52] + getitem_497 = _foreach_add_1[53] + getitem_498 = _foreach_add_1[54] + getitem_499 = _foreach_add_1[55] + getitem_500 = _foreach_add_1[56] + getitem_501 = _foreach_add_1[57] + getitem_502 = _foreach_add_1[58] + getitem_503 = _foreach_add_1[59] + getitem_504 = _foreach_add_1[60] + getitem_505 = _foreach_add_1[61] + getitem_506 = _foreach_add_1[62] + getitem_507 = _foreach_add_1[63] + getitem_508 = _foreach_add_1[64] + getitem_509 = _foreach_add_1[65] + getitem_510 = _foreach_add_1[66] + getitem_511 = _foreach_add_1[67] + getitem_512 = _foreach_add_1[68] + getitem_513 = _foreach_add_1[69] + getitem_514 = _foreach_add_1[70] + getitem_515 = _foreach_add_1[71] + getitem_516 = _foreach_add_1[72] + getitem_517 = _foreach_add_1[73] + getitem_518 = _foreach_add_1[74] + getitem_519 = _foreach_add_1[75] + getitem_520 = _foreach_add_1[76] + getitem_521 = _foreach_add_1[77] + getitem_522 = _foreach_add_1[78] + getitem_523 = _foreach_add_1[79] + getitem_524 = _foreach_add_1[80] + getitem_525 = _foreach_add_1[81] + getitem_526 = _foreach_add_1[82] + getitem_527 = _foreach_add_1[83] + getitem_528 = _foreach_add_1[84] + getitem_529 = _foreach_add_1[85] + getitem_530 = _foreach_add_1[86] + getitem_531 = _foreach_add_1[87] + getitem_532 = _foreach_add_1[88] + getitem_533 = _foreach_add_1[89] + getitem_534 = _foreach_add_1[90] + getitem_535 = _foreach_add_1[91] + getitem_536 = _foreach_add_1[92] + getitem_537 = _foreach_add_1[93] + getitem_538 = _foreach_add_1[94] + getitem_539 = _foreach_add_1[95] + getitem_540 = _foreach_add_1[96] + getitem_541 = _foreach_add_1[97] + getitem_542 = _foreach_add_1[98] + getitem_543 = _foreach_add_1[99] + getitem_544 = _foreach_add_1[100] + getitem_545 = _foreach_add_1[101] + getitem_546 = _foreach_add_1[102] + getitem_547 = _foreach_add_1[103] + getitem_548 = _foreach_add_1[104] + getitem_549 = _foreach_add_1[105] + getitem_550 = _foreach_add_1[106] + getitem_551 = _foreach_add_1[107] + getitem_552 = _foreach_add_1[108] + getitem_553 = _foreach_add_1[109] + getitem_554 = _foreach_add_1[110] + getitem_555 = _foreach_add_1[111] + getitem_556 = _foreach_add_1[112] + getitem_557 = _foreach_add_1[113] + getitem_558 = _foreach_add_1[114] + getitem_559 = _foreach_add_1[115] + getitem_560 = _foreach_add_1[116] + getitem_561 = _foreach_add_1[117] + getitem_562 = _foreach_add_1[118] + getitem_563 = _foreach_add_1[119] + getitem_564 = _foreach_add_1[120] + getitem_565 = _foreach_add_1[121] + getitem_566 = _foreach_add_1[122] + getitem_567 = _foreach_add_1[123] + getitem_568 = _foreach_add_1[124] + getitem_569 = _foreach_add_1[125] + getitem_570 = _foreach_add_1[126] + getitem_571 = _foreach_add_1[127] + getitem_572 = _foreach_add_1[128] + getitem_573 = _foreach_add_1[129] + getitem_574 = _foreach_add_1[130] + getitem_575 = _foreach_add_1[131] + getitem_576 = _foreach_add_1[132] + getitem_577 = _foreach_add_1[133] + getitem_578 = _foreach_add_1[134] + getitem_579 = _foreach_add_1[135] + getitem_580 = _foreach_add_1[136] + getitem_581 = _foreach_add_1[137] + getitem_582 = _foreach_add_1[138] + getitem_583 = _foreach_add_1[139] + getitem_584 = _foreach_add_1[140] + getitem_585 = _foreach_add_1[141] + getitem_586 = _foreach_add_1[142] + getitem_587 = _foreach_add_1[143] + getitem_588 = _foreach_add_1[144] + getitem_589 = _foreach_add_1[145] + getitem_590 = _foreach_add_1[146] + getitem_591 = _foreach_add_1[147]; _foreach_add_1 = None + _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999) + getitem_592 = _foreach_mul_1[0] + getitem_593 = _foreach_mul_1[1] + getitem_594 = _foreach_mul_1[2] + getitem_595 = _foreach_mul_1[3] + getitem_596 = _foreach_mul_1[4] + getitem_597 = _foreach_mul_1[5] + getitem_598 = _foreach_mul_1[6] + getitem_599 = _foreach_mul_1[7] + getitem_600 = _foreach_mul_1[8] + getitem_601 = _foreach_mul_1[9] + getitem_602 = _foreach_mul_1[10] + getitem_603 = _foreach_mul_1[11] + getitem_604 = _foreach_mul_1[12] + getitem_605 = _foreach_mul_1[13] + getitem_606 = _foreach_mul_1[14] + getitem_607 = _foreach_mul_1[15] + getitem_608 = _foreach_mul_1[16] + getitem_609 = _foreach_mul_1[17] + getitem_610 = _foreach_mul_1[18] + getitem_611 = _foreach_mul_1[19] + getitem_612 = _foreach_mul_1[20] + getitem_613 = _foreach_mul_1[21] + getitem_614 = _foreach_mul_1[22] + getitem_615 = _foreach_mul_1[23] + getitem_616 = _foreach_mul_1[24] + getitem_617 = _foreach_mul_1[25] + getitem_618 = _foreach_mul_1[26] + getitem_619 = _foreach_mul_1[27] + getitem_620 = _foreach_mul_1[28] + getitem_621 = _foreach_mul_1[29] + getitem_622 = _foreach_mul_1[30] + getitem_623 = _foreach_mul_1[31] + getitem_624 = _foreach_mul_1[32] + getitem_625 = _foreach_mul_1[33] + getitem_626 = _foreach_mul_1[34] + getitem_627 = _foreach_mul_1[35] + getitem_628 = _foreach_mul_1[36] + getitem_629 = _foreach_mul_1[37] + getitem_630 = _foreach_mul_1[38] + getitem_631 = _foreach_mul_1[39] + getitem_632 = _foreach_mul_1[40] + getitem_633 = _foreach_mul_1[41] + getitem_634 = _foreach_mul_1[42] + getitem_635 = _foreach_mul_1[43] + getitem_636 = _foreach_mul_1[44] + getitem_637 = _foreach_mul_1[45] + getitem_638 = _foreach_mul_1[46] + getitem_639 = _foreach_mul_1[47] + getitem_640 = _foreach_mul_1[48] + getitem_641 = _foreach_mul_1[49] + getitem_642 = _foreach_mul_1[50] + getitem_643 = _foreach_mul_1[51] + getitem_644 = _foreach_mul_1[52] + getitem_645 = _foreach_mul_1[53] + getitem_646 = _foreach_mul_1[54] + getitem_647 = _foreach_mul_1[55] + getitem_648 = _foreach_mul_1[56] + getitem_649 = _foreach_mul_1[57] + getitem_650 = _foreach_mul_1[58] + getitem_651 = _foreach_mul_1[59] + getitem_652 = _foreach_mul_1[60] + getitem_653 = _foreach_mul_1[61] + getitem_654 = _foreach_mul_1[62] + getitem_655 = _foreach_mul_1[63] + getitem_656 = _foreach_mul_1[64] + getitem_657 = _foreach_mul_1[65] + getitem_658 = _foreach_mul_1[66] + getitem_659 = _foreach_mul_1[67] + getitem_660 = _foreach_mul_1[68] + getitem_661 = _foreach_mul_1[69] + getitem_662 = _foreach_mul_1[70] + getitem_663 = _foreach_mul_1[71] + getitem_664 = _foreach_mul_1[72] + getitem_665 = _foreach_mul_1[73] + getitem_666 = _foreach_mul_1[74] + getitem_667 = _foreach_mul_1[75] + getitem_668 = _foreach_mul_1[76] + getitem_669 = _foreach_mul_1[77] + getitem_670 = _foreach_mul_1[78] + getitem_671 = _foreach_mul_1[79] + getitem_672 = _foreach_mul_1[80] + getitem_673 = _foreach_mul_1[81] + getitem_674 = _foreach_mul_1[82] + getitem_675 = _foreach_mul_1[83] + getitem_676 = _foreach_mul_1[84] + getitem_677 = _foreach_mul_1[85] + getitem_678 = _foreach_mul_1[86] + getitem_679 = _foreach_mul_1[87] + getitem_680 = _foreach_mul_1[88] + getitem_681 = _foreach_mul_1[89] + getitem_682 = _foreach_mul_1[90] + getitem_683 = _foreach_mul_1[91] + getitem_684 = _foreach_mul_1[92] + getitem_685 = _foreach_mul_1[93] + getitem_686 = _foreach_mul_1[94] + getitem_687 = _foreach_mul_1[95] + getitem_688 = _foreach_mul_1[96] + getitem_689 = _foreach_mul_1[97] + getitem_690 = _foreach_mul_1[98] + getitem_691 = _foreach_mul_1[99] + getitem_692 = _foreach_mul_1[100] + getitem_693 = _foreach_mul_1[101] + getitem_694 = _foreach_mul_1[102] + getitem_695 = _foreach_mul_1[103] + getitem_696 = _foreach_mul_1[104] + getitem_697 = _foreach_mul_1[105] + getitem_698 = _foreach_mul_1[106] + getitem_699 = _foreach_mul_1[107] + getitem_700 = _foreach_mul_1[108] + getitem_701 = _foreach_mul_1[109] + getitem_702 = _foreach_mul_1[110] + getitem_703 = _foreach_mul_1[111] + getitem_704 = _foreach_mul_1[112] + getitem_705 = _foreach_mul_1[113] + getitem_706 = _foreach_mul_1[114] + getitem_707 = _foreach_mul_1[115] + getitem_708 = _foreach_mul_1[116] + getitem_709 = _foreach_mul_1[117] + getitem_710 = _foreach_mul_1[118] + getitem_711 = _foreach_mul_1[119] + getitem_712 = _foreach_mul_1[120] + getitem_713 = _foreach_mul_1[121] + getitem_714 = _foreach_mul_1[122] + getitem_715 = _foreach_mul_1[123] + getitem_716 = _foreach_mul_1[124] + getitem_717 = _foreach_mul_1[125] + getitem_718 = _foreach_mul_1[126] + getitem_719 = _foreach_mul_1[127] + getitem_720 = _foreach_mul_1[128] + getitem_721 = _foreach_mul_1[129] + getitem_722 = _foreach_mul_1[130] + getitem_723 = _foreach_mul_1[131] + getitem_724 = _foreach_mul_1[132] + getitem_725 = _foreach_mul_1[133] + getitem_726 = _foreach_mul_1[134] + getitem_727 = _foreach_mul_1[135] + getitem_728 = _foreach_mul_1[136] + getitem_729 = _foreach_mul_1[137] + getitem_730 = _foreach_mul_1[138] + getitem_731 = _foreach_mul_1[139] + getitem_732 = _foreach_mul_1[140] + getitem_733 = _foreach_mul_1[141] + getitem_734 = _foreach_mul_1[142] + getitem_735 = _foreach_mul_1[143] + getitem_736 = _foreach_mul_1[144] + getitem_737 = _foreach_mul_1[145] + getitem_738 = _foreach_mul_1[146] + getitem_739 = _foreach_mul_1[147]; _foreach_mul_1 = None + _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]); arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None + getitem_740 = _foreach_mul_2[0] + getitem_741 = _foreach_mul_2[1] + getitem_742 = _foreach_mul_2[2] + getitem_743 = _foreach_mul_2[3] + getitem_744 = _foreach_mul_2[4] + getitem_745 = _foreach_mul_2[5] + getitem_746 = _foreach_mul_2[6] + getitem_747 = _foreach_mul_2[7] + getitem_748 = _foreach_mul_2[8] + getitem_749 = _foreach_mul_2[9] + getitem_750 = _foreach_mul_2[10] + getitem_751 = _foreach_mul_2[11] + getitem_752 = _foreach_mul_2[12] + getitem_753 = _foreach_mul_2[13] + getitem_754 = _foreach_mul_2[14] + getitem_755 = _foreach_mul_2[15] + getitem_756 = _foreach_mul_2[16] + getitem_757 = _foreach_mul_2[17] + getitem_758 = _foreach_mul_2[18] + getitem_759 = _foreach_mul_2[19] + getitem_760 = _foreach_mul_2[20] + getitem_761 = _foreach_mul_2[21] + getitem_762 = _foreach_mul_2[22] + getitem_763 = _foreach_mul_2[23] + getitem_764 = _foreach_mul_2[24] + getitem_765 = _foreach_mul_2[25] + getitem_766 = _foreach_mul_2[26] + getitem_767 = _foreach_mul_2[27] + getitem_768 = _foreach_mul_2[28] + getitem_769 = _foreach_mul_2[29] + getitem_770 = _foreach_mul_2[30] + getitem_771 = _foreach_mul_2[31] + getitem_772 = _foreach_mul_2[32] + getitem_773 = _foreach_mul_2[33] + getitem_774 = _foreach_mul_2[34] + getitem_775 = _foreach_mul_2[35] + getitem_776 = _foreach_mul_2[36] + getitem_777 = _foreach_mul_2[37] + getitem_778 = _foreach_mul_2[38] + getitem_779 = _foreach_mul_2[39] + getitem_780 = _foreach_mul_2[40] + getitem_781 = _foreach_mul_2[41] + getitem_782 = _foreach_mul_2[42] + getitem_783 = _foreach_mul_2[43] + getitem_784 = _foreach_mul_2[44] + getitem_785 = _foreach_mul_2[45] + getitem_786 = _foreach_mul_2[46] + getitem_787 = _foreach_mul_2[47] + getitem_788 = _foreach_mul_2[48] + getitem_789 = _foreach_mul_2[49] + getitem_790 = _foreach_mul_2[50] + getitem_791 = _foreach_mul_2[51] + getitem_792 = _foreach_mul_2[52] + getitem_793 = _foreach_mul_2[53] + getitem_794 = _foreach_mul_2[54] + getitem_795 = _foreach_mul_2[55] + getitem_796 = _foreach_mul_2[56] + getitem_797 = _foreach_mul_2[57] + getitem_798 = _foreach_mul_2[58] + getitem_799 = _foreach_mul_2[59] + getitem_800 = _foreach_mul_2[60] + getitem_801 = _foreach_mul_2[61] + getitem_802 = _foreach_mul_2[62] + getitem_803 = _foreach_mul_2[63] + getitem_804 = _foreach_mul_2[64] + getitem_805 = _foreach_mul_2[65] + getitem_806 = _foreach_mul_2[66] + getitem_807 = _foreach_mul_2[67] + getitem_808 = _foreach_mul_2[68] + getitem_809 = _foreach_mul_2[69] + getitem_810 = _foreach_mul_2[70] + getitem_811 = _foreach_mul_2[71] + getitem_812 = _foreach_mul_2[72] + getitem_813 = _foreach_mul_2[73] + getitem_814 = _foreach_mul_2[74] + getitem_815 = _foreach_mul_2[75] + getitem_816 = _foreach_mul_2[76] + getitem_817 = _foreach_mul_2[77] + getitem_818 = _foreach_mul_2[78] + getitem_819 = _foreach_mul_2[79] + getitem_820 = _foreach_mul_2[80] + getitem_821 = _foreach_mul_2[81] + getitem_822 = _foreach_mul_2[82] + getitem_823 = _foreach_mul_2[83] + getitem_824 = _foreach_mul_2[84] + getitem_825 = _foreach_mul_2[85] + getitem_826 = _foreach_mul_2[86] + getitem_827 = _foreach_mul_2[87] + getitem_828 = _foreach_mul_2[88] + getitem_829 = _foreach_mul_2[89] + getitem_830 = _foreach_mul_2[90] + getitem_831 = _foreach_mul_2[91] + getitem_832 = _foreach_mul_2[92] + getitem_833 = _foreach_mul_2[93] + getitem_834 = _foreach_mul_2[94] + getitem_835 = _foreach_mul_2[95] + getitem_836 = _foreach_mul_2[96] + getitem_837 = _foreach_mul_2[97] + getitem_838 = _foreach_mul_2[98] + getitem_839 = _foreach_mul_2[99] + getitem_840 = _foreach_mul_2[100] + getitem_841 = _foreach_mul_2[101] + getitem_842 = _foreach_mul_2[102] + getitem_843 = _foreach_mul_2[103] + getitem_844 = _foreach_mul_2[104] + getitem_845 = _foreach_mul_2[105] + getitem_846 = _foreach_mul_2[106] + getitem_847 = _foreach_mul_2[107] + getitem_848 = _foreach_mul_2[108] + getitem_849 = _foreach_mul_2[109] + getitem_850 = _foreach_mul_2[110] + getitem_851 = _foreach_mul_2[111] + getitem_852 = _foreach_mul_2[112] + getitem_853 = _foreach_mul_2[113] + getitem_854 = _foreach_mul_2[114] + getitem_855 = _foreach_mul_2[115] + getitem_856 = _foreach_mul_2[116] + getitem_857 = _foreach_mul_2[117] + getitem_858 = _foreach_mul_2[118] + getitem_859 = _foreach_mul_2[119] + getitem_860 = _foreach_mul_2[120] + getitem_861 = _foreach_mul_2[121] + getitem_862 = _foreach_mul_2[122] + getitem_863 = _foreach_mul_2[123] + getitem_864 = _foreach_mul_2[124] + getitem_865 = _foreach_mul_2[125] + getitem_866 = _foreach_mul_2[126] + getitem_867 = _foreach_mul_2[127] + getitem_868 = _foreach_mul_2[128] + getitem_869 = _foreach_mul_2[129] + getitem_870 = _foreach_mul_2[130] + getitem_871 = _foreach_mul_2[131] + getitem_872 = _foreach_mul_2[132] + getitem_873 = _foreach_mul_2[133] + getitem_874 = _foreach_mul_2[134] + getitem_875 = _foreach_mul_2[135] + getitem_876 = _foreach_mul_2[136] + getitem_877 = _foreach_mul_2[137] + getitem_878 = _foreach_mul_2[138] + getitem_879 = _foreach_mul_2[139] + getitem_880 = _foreach_mul_2[140] + getitem_881 = _foreach_mul_2[141] + getitem_882 = _foreach_mul_2[142] + getitem_883 = _foreach_mul_2[143] + getitem_884 = _foreach_mul_2[144] + getitem_885 = _foreach_mul_2[145] + getitem_886 = _foreach_mul_2[146] + getitem_887 = _foreach_mul_2[147]; _foreach_mul_2 = None + _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None + getitem_888 = _foreach_add_2[0] + getitem_889 = _foreach_add_2[1] + getitem_890 = _foreach_add_2[2] + getitem_891 = _foreach_add_2[3] + getitem_892 = _foreach_add_2[4] + getitem_893 = _foreach_add_2[5] + getitem_894 = _foreach_add_2[6] + getitem_895 = _foreach_add_2[7] + getitem_896 = _foreach_add_2[8] + getitem_897 = _foreach_add_2[9] + getitem_898 = _foreach_add_2[10] + getitem_899 = _foreach_add_2[11] + getitem_900 = _foreach_add_2[12] + getitem_901 = _foreach_add_2[13] + getitem_902 = _foreach_add_2[14] + getitem_903 = _foreach_add_2[15] + getitem_904 = _foreach_add_2[16] + getitem_905 = _foreach_add_2[17] + getitem_906 = _foreach_add_2[18] + getitem_907 = _foreach_add_2[19] + getitem_908 = _foreach_add_2[20] + getitem_909 = _foreach_add_2[21] + getitem_910 = _foreach_add_2[22] + getitem_911 = _foreach_add_2[23] + getitem_912 = _foreach_add_2[24] + getitem_913 = _foreach_add_2[25] + getitem_914 = _foreach_add_2[26] + getitem_915 = _foreach_add_2[27] + getitem_916 = _foreach_add_2[28] + getitem_917 = _foreach_add_2[29] + getitem_918 = _foreach_add_2[30] + getitem_919 = _foreach_add_2[31] + getitem_920 = _foreach_add_2[32] + getitem_921 = _foreach_add_2[33] + getitem_922 = _foreach_add_2[34] + getitem_923 = _foreach_add_2[35] + getitem_924 = _foreach_add_2[36] + getitem_925 = _foreach_add_2[37] + getitem_926 = _foreach_add_2[38] + getitem_927 = _foreach_add_2[39] + getitem_928 = _foreach_add_2[40] + getitem_929 = _foreach_add_2[41] + getitem_930 = _foreach_add_2[42] + getitem_931 = _foreach_add_2[43] + getitem_932 = _foreach_add_2[44] + getitem_933 = _foreach_add_2[45] + getitem_934 = _foreach_add_2[46] + getitem_935 = _foreach_add_2[47] + getitem_936 = _foreach_add_2[48] + getitem_937 = _foreach_add_2[49] + getitem_938 = _foreach_add_2[50] + getitem_939 = _foreach_add_2[51] + getitem_940 = _foreach_add_2[52] + getitem_941 = _foreach_add_2[53] + getitem_942 = _foreach_add_2[54] + getitem_943 = _foreach_add_2[55] + getitem_944 = _foreach_add_2[56] + getitem_945 = _foreach_add_2[57] + getitem_946 = _foreach_add_2[58] + getitem_947 = _foreach_add_2[59] + getitem_948 = _foreach_add_2[60] + getitem_949 = _foreach_add_2[61] + getitem_950 = _foreach_add_2[62] + getitem_951 = _foreach_add_2[63] + getitem_952 = _foreach_add_2[64] + getitem_953 = _foreach_add_2[65] + getitem_954 = _foreach_add_2[66] + getitem_955 = _foreach_add_2[67] + getitem_956 = _foreach_add_2[68] + getitem_957 = _foreach_add_2[69] + getitem_958 = _foreach_add_2[70] + getitem_959 = _foreach_add_2[71] + getitem_960 = _foreach_add_2[72] + getitem_961 = _foreach_add_2[73] + getitem_962 = _foreach_add_2[74] + getitem_963 = _foreach_add_2[75] + getitem_964 = _foreach_add_2[76] + getitem_965 = _foreach_add_2[77] + getitem_966 = _foreach_add_2[78] + getitem_967 = _foreach_add_2[79] + getitem_968 = _foreach_add_2[80] + getitem_969 = _foreach_add_2[81] + getitem_970 = _foreach_add_2[82] + getitem_971 = _foreach_add_2[83] + getitem_972 = _foreach_add_2[84] + getitem_973 = _foreach_add_2[85] + getitem_974 = _foreach_add_2[86] + getitem_975 = _foreach_add_2[87] + getitem_976 = _foreach_add_2[88] + getitem_977 = _foreach_add_2[89] + getitem_978 = _foreach_add_2[90] + getitem_979 = _foreach_add_2[91] + getitem_980 = _foreach_add_2[92] + getitem_981 = _foreach_add_2[93] + getitem_982 = _foreach_add_2[94] + getitem_983 = _foreach_add_2[95] + getitem_984 = _foreach_add_2[96] + getitem_985 = _foreach_add_2[97] + getitem_986 = _foreach_add_2[98] + getitem_987 = _foreach_add_2[99] + getitem_988 = _foreach_add_2[100] + getitem_989 = _foreach_add_2[101] + getitem_990 = _foreach_add_2[102] + getitem_991 = _foreach_add_2[103] + getitem_992 = _foreach_add_2[104] + getitem_993 = _foreach_add_2[105] + getitem_994 = _foreach_add_2[106] + getitem_995 = _foreach_add_2[107] + getitem_996 = _foreach_add_2[108] + getitem_997 = _foreach_add_2[109] + getitem_998 = _foreach_add_2[110] + getitem_999 = _foreach_add_2[111] + getitem_1000 = _foreach_add_2[112] + getitem_1001 = _foreach_add_2[113] + getitem_1002 = _foreach_add_2[114] + getitem_1003 = _foreach_add_2[115] + getitem_1004 = _foreach_add_2[116] + getitem_1005 = _foreach_add_2[117] + getitem_1006 = _foreach_add_2[118] + getitem_1007 = _foreach_add_2[119] + getitem_1008 = _foreach_add_2[120] + getitem_1009 = _foreach_add_2[121] + getitem_1010 = _foreach_add_2[122] + getitem_1011 = _foreach_add_2[123] + getitem_1012 = _foreach_add_2[124] + getitem_1013 = _foreach_add_2[125] + getitem_1014 = _foreach_add_2[126] + getitem_1015 = _foreach_add_2[127] + getitem_1016 = _foreach_add_2[128] + getitem_1017 = _foreach_add_2[129] + getitem_1018 = _foreach_add_2[130] + getitem_1019 = _foreach_add_2[131] + getitem_1020 = _foreach_add_2[132] + getitem_1021 = _foreach_add_2[133] + getitem_1022 = _foreach_add_2[134] + getitem_1023 = _foreach_add_2[135] + getitem_1024 = _foreach_add_2[136] + getitem_1025 = _foreach_add_2[137] + getitem_1026 = _foreach_add_2[138] + getitem_1027 = _foreach_add_2[139] + getitem_1028 = _foreach_add_2[140] + getitem_1029 = _foreach_add_2[141] + getitem_1030 = _foreach_add_2[142] + getitem_1031 = _foreach_add_2[143] + getitem_1032 = _foreach_add_2[144] + getitem_1033 = _foreach_add_2[145] + getitem_1034 = _foreach_add_2[146] + getitem_1035 = _foreach_add_2[147]; _foreach_add_2 = None + _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]) + getitem_1036 = _foreach_pow[0] + getitem_1037 = _foreach_pow[1] + getitem_1038 = _foreach_pow[2] + getitem_1039 = _foreach_pow[3] + getitem_1040 = _foreach_pow[4] + getitem_1041 = _foreach_pow[5] + getitem_1042 = _foreach_pow[6] + getitem_1043 = _foreach_pow[7] + getitem_1044 = _foreach_pow[8] + getitem_1045 = _foreach_pow[9] + getitem_1046 = _foreach_pow[10] + getitem_1047 = _foreach_pow[11] + getitem_1048 = _foreach_pow[12] + getitem_1049 = _foreach_pow[13] + getitem_1050 = _foreach_pow[14] + getitem_1051 = _foreach_pow[15] + getitem_1052 = _foreach_pow[16] + getitem_1053 = _foreach_pow[17] + getitem_1054 = _foreach_pow[18] + getitem_1055 = _foreach_pow[19] + getitem_1056 = _foreach_pow[20] + getitem_1057 = _foreach_pow[21] + getitem_1058 = _foreach_pow[22] + getitem_1059 = _foreach_pow[23] + getitem_1060 = _foreach_pow[24] + getitem_1061 = _foreach_pow[25] + getitem_1062 = _foreach_pow[26] + getitem_1063 = _foreach_pow[27] + getitem_1064 = _foreach_pow[28] + getitem_1065 = _foreach_pow[29] + getitem_1066 = _foreach_pow[30] + getitem_1067 = _foreach_pow[31] + getitem_1068 = _foreach_pow[32] + getitem_1069 = _foreach_pow[33] + getitem_1070 = _foreach_pow[34] + getitem_1071 = _foreach_pow[35] + getitem_1072 = _foreach_pow[36] + getitem_1073 = _foreach_pow[37] + getitem_1074 = _foreach_pow[38] + getitem_1075 = _foreach_pow[39] + getitem_1076 = _foreach_pow[40] + getitem_1077 = _foreach_pow[41] + getitem_1078 = _foreach_pow[42] + getitem_1079 = _foreach_pow[43] + getitem_1080 = _foreach_pow[44] + getitem_1081 = _foreach_pow[45] + getitem_1082 = _foreach_pow[46] + getitem_1083 = _foreach_pow[47] + getitem_1084 = _foreach_pow[48] + getitem_1085 = _foreach_pow[49] + getitem_1086 = _foreach_pow[50] + getitem_1087 = _foreach_pow[51] + getitem_1088 = _foreach_pow[52] + getitem_1089 = _foreach_pow[53] + getitem_1090 = _foreach_pow[54] + getitem_1091 = _foreach_pow[55] + getitem_1092 = _foreach_pow[56] + getitem_1093 = _foreach_pow[57] + getitem_1094 = _foreach_pow[58] + getitem_1095 = _foreach_pow[59] + getitem_1096 = _foreach_pow[60] + getitem_1097 = _foreach_pow[61] + getitem_1098 = _foreach_pow[62] + getitem_1099 = _foreach_pow[63] + getitem_1100 = _foreach_pow[64] + getitem_1101 = _foreach_pow[65] + getitem_1102 = _foreach_pow[66] + getitem_1103 = _foreach_pow[67] + getitem_1104 = _foreach_pow[68] + getitem_1105 = _foreach_pow[69] + getitem_1106 = _foreach_pow[70] + getitem_1107 = _foreach_pow[71] + getitem_1108 = _foreach_pow[72] + getitem_1109 = _foreach_pow[73] + getitem_1110 = _foreach_pow[74] + getitem_1111 = _foreach_pow[75] + getitem_1112 = _foreach_pow[76] + getitem_1113 = _foreach_pow[77] + getitem_1114 = _foreach_pow[78] + getitem_1115 = _foreach_pow[79] + getitem_1116 = _foreach_pow[80] + getitem_1117 = _foreach_pow[81] + getitem_1118 = _foreach_pow[82] + getitem_1119 = _foreach_pow[83] + getitem_1120 = _foreach_pow[84] + getitem_1121 = _foreach_pow[85] + getitem_1122 = _foreach_pow[86] + getitem_1123 = _foreach_pow[87] + getitem_1124 = _foreach_pow[88] + getitem_1125 = _foreach_pow[89] + getitem_1126 = _foreach_pow[90] + getitem_1127 = _foreach_pow[91] + getitem_1128 = _foreach_pow[92] + getitem_1129 = _foreach_pow[93] + getitem_1130 = _foreach_pow[94] + getitem_1131 = _foreach_pow[95] + getitem_1132 = _foreach_pow[96] + getitem_1133 = _foreach_pow[97] + getitem_1134 = _foreach_pow[98] + getitem_1135 = _foreach_pow[99] + getitem_1136 = _foreach_pow[100] + getitem_1137 = _foreach_pow[101] + getitem_1138 = _foreach_pow[102] + getitem_1139 = _foreach_pow[103] + getitem_1140 = _foreach_pow[104] + getitem_1141 = _foreach_pow[105] + getitem_1142 = _foreach_pow[106] + getitem_1143 = _foreach_pow[107] + getitem_1144 = _foreach_pow[108] + getitem_1145 = _foreach_pow[109] + getitem_1146 = _foreach_pow[110] + getitem_1147 = _foreach_pow[111] + getitem_1148 = _foreach_pow[112] + getitem_1149 = _foreach_pow[113] + getitem_1150 = _foreach_pow[114] + getitem_1151 = _foreach_pow[115] + getitem_1152 = _foreach_pow[116] + getitem_1153 = _foreach_pow[117] + getitem_1154 = _foreach_pow[118] + getitem_1155 = _foreach_pow[119] + getitem_1156 = _foreach_pow[120] + getitem_1157 = _foreach_pow[121] + getitem_1158 = _foreach_pow[122] + getitem_1159 = _foreach_pow[123] + getitem_1160 = _foreach_pow[124] + getitem_1161 = _foreach_pow[125] + getitem_1162 = _foreach_pow[126] + getitem_1163 = _foreach_pow[127] + getitem_1164 = _foreach_pow[128] + getitem_1165 = _foreach_pow[129] + getitem_1166 = _foreach_pow[130] + getitem_1167 = _foreach_pow[131] + getitem_1168 = _foreach_pow[132] + getitem_1169 = _foreach_pow[133] + getitem_1170 = _foreach_pow[134] + getitem_1171 = _foreach_pow[135] + getitem_1172 = _foreach_pow[136] + getitem_1173 = _foreach_pow[137] + getitem_1174 = _foreach_pow[138] + getitem_1175 = _foreach_pow[139] + getitem_1176 = _foreach_pow[140] + getitem_1177 = _foreach_pow[141] + getitem_1178 = _foreach_pow[142] + getitem_1179 = _foreach_pow[143] + getitem_1180 = _foreach_pow[144] + getitem_1181 = _foreach_pow[145] + getitem_1182 = _foreach_pow[146] + getitem_1183 = _foreach_pow[147]; _foreach_pow = None + _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]) + getitem_1184 = _foreach_pow_1[0] + getitem_1185 = _foreach_pow_1[1] + getitem_1186 = _foreach_pow_1[2] + getitem_1187 = _foreach_pow_1[3] + getitem_1188 = _foreach_pow_1[4] + getitem_1189 = _foreach_pow_1[5] + getitem_1190 = _foreach_pow_1[6] + getitem_1191 = _foreach_pow_1[7] + getitem_1192 = _foreach_pow_1[8] + getitem_1193 = _foreach_pow_1[9] + getitem_1194 = _foreach_pow_1[10] + getitem_1195 = _foreach_pow_1[11] + getitem_1196 = _foreach_pow_1[12] + getitem_1197 = _foreach_pow_1[13] + getitem_1198 = _foreach_pow_1[14] + getitem_1199 = _foreach_pow_1[15] + getitem_1200 = _foreach_pow_1[16] + getitem_1201 = _foreach_pow_1[17] + getitem_1202 = _foreach_pow_1[18] + getitem_1203 = _foreach_pow_1[19] + getitem_1204 = _foreach_pow_1[20] + getitem_1205 = _foreach_pow_1[21] + getitem_1206 = _foreach_pow_1[22] + getitem_1207 = _foreach_pow_1[23] + getitem_1208 = _foreach_pow_1[24] + getitem_1209 = _foreach_pow_1[25] + getitem_1210 = _foreach_pow_1[26] + getitem_1211 = _foreach_pow_1[27] + getitem_1212 = _foreach_pow_1[28] + getitem_1213 = _foreach_pow_1[29] + getitem_1214 = _foreach_pow_1[30] + getitem_1215 = _foreach_pow_1[31] + getitem_1216 = _foreach_pow_1[32] + getitem_1217 = _foreach_pow_1[33] + getitem_1218 = _foreach_pow_1[34] + getitem_1219 = _foreach_pow_1[35] + getitem_1220 = _foreach_pow_1[36] + getitem_1221 = _foreach_pow_1[37] + getitem_1222 = _foreach_pow_1[38] + getitem_1223 = _foreach_pow_1[39] + getitem_1224 = _foreach_pow_1[40] + getitem_1225 = _foreach_pow_1[41] + getitem_1226 = _foreach_pow_1[42] + getitem_1227 = _foreach_pow_1[43] + getitem_1228 = _foreach_pow_1[44] + getitem_1229 = _foreach_pow_1[45] + getitem_1230 = _foreach_pow_1[46] + getitem_1231 = _foreach_pow_1[47] + getitem_1232 = _foreach_pow_1[48] + getitem_1233 = _foreach_pow_1[49] + getitem_1234 = _foreach_pow_1[50] + getitem_1235 = _foreach_pow_1[51] + getitem_1236 = _foreach_pow_1[52] + getitem_1237 = _foreach_pow_1[53] + getitem_1238 = _foreach_pow_1[54] + getitem_1239 = _foreach_pow_1[55] + getitem_1240 = _foreach_pow_1[56] + getitem_1241 = _foreach_pow_1[57] + getitem_1242 = _foreach_pow_1[58] + getitem_1243 = _foreach_pow_1[59] + getitem_1244 = _foreach_pow_1[60] + getitem_1245 = _foreach_pow_1[61] + getitem_1246 = _foreach_pow_1[62] + getitem_1247 = _foreach_pow_1[63] + getitem_1248 = _foreach_pow_1[64] + getitem_1249 = _foreach_pow_1[65] + getitem_1250 = _foreach_pow_1[66] + getitem_1251 = _foreach_pow_1[67] + getitem_1252 = _foreach_pow_1[68] + getitem_1253 = _foreach_pow_1[69] + getitem_1254 = _foreach_pow_1[70] + getitem_1255 = _foreach_pow_1[71] + getitem_1256 = _foreach_pow_1[72] + getitem_1257 = _foreach_pow_1[73] + getitem_1258 = _foreach_pow_1[74] + getitem_1259 = _foreach_pow_1[75] + getitem_1260 = _foreach_pow_1[76] + getitem_1261 = _foreach_pow_1[77] + getitem_1262 = _foreach_pow_1[78] + getitem_1263 = _foreach_pow_1[79] + getitem_1264 = _foreach_pow_1[80] + getitem_1265 = _foreach_pow_1[81] + getitem_1266 = _foreach_pow_1[82] + getitem_1267 = _foreach_pow_1[83] + getitem_1268 = _foreach_pow_1[84] + getitem_1269 = _foreach_pow_1[85] + getitem_1270 = _foreach_pow_1[86] + getitem_1271 = _foreach_pow_1[87] + getitem_1272 = _foreach_pow_1[88] + getitem_1273 = _foreach_pow_1[89] + getitem_1274 = _foreach_pow_1[90] + getitem_1275 = _foreach_pow_1[91] + getitem_1276 = _foreach_pow_1[92] + getitem_1277 = _foreach_pow_1[93] + getitem_1278 = _foreach_pow_1[94] + getitem_1279 = _foreach_pow_1[95] + getitem_1280 = _foreach_pow_1[96] + getitem_1281 = _foreach_pow_1[97] + getitem_1282 = _foreach_pow_1[98] + getitem_1283 = _foreach_pow_1[99] + getitem_1284 = _foreach_pow_1[100] + getitem_1285 = _foreach_pow_1[101] + getitem_1286 = _foreach_pow_1[102] + getitem_1287 = _foreach_pow_1[103] + getitem_1288 = _foreach_pow_1[104] + getitem_1289 = _foreach_pow_1[105] + getitem_1290 = _foreach_pow_1[106] + getitem_1291 = _foreach_pow_1[107] + getitem_1292 = _foreach_pow_1[108] + getitem_1293 = _foreach_pow_1[109] + getitem_1294 = _foreach_pow_1[110] + getitem_1295 = _foreach_pow_1[111] + getitem_1296 = _foreach_pow_1[112] + getitem_1297 = _foreach_pow_1[113] + getitem_1298 = _foreach_pow_1[114] + getitem_1299 = _foreach_pow_1[115] + getitem_1300 = _foreach_pow_1[116] + getitem_1301 = _foreach_pow_1[117] + getitem_1302 = _foreach_pow_1[118] + getitem_1303 = _foreach_pow_1[119] + getitem_1304 = _foreach_pow_1[120] + getitem_1305 = _foreach_pow_1[121] + getitem_1306 = _foreach_pow_1[122] + getitem_1307 = _foreach_pow_1[123] + getitem_1308 = _foreach_pow_1[124] + getitem_1309 = _foreach_pow_1[125] + getitem_1310 = _foreach_pow_1[126] + getitem_1311 = _foreach_pow_1[127] + getitem_1312 = _foreach_pow_1[128] + getitem_1313 = _foreach_pow_1[129] + getitem_1314 = _foreach_pow_1[130] + getitem_1315 = _foreach_pow_1[131] + getitem_1316 = _foreach_pow_1[132] + getitem_1317 = _foreach_pow_1[133] + getitem_1318 = _foreach_pow_1[134] + getitem_1319 = _foreach_pow_1[135] + getitem_1320 = _foreach_pow_1[136] + getitem_1321 = _foreach_pow_1[137] + getitem_1322 = _foreach_pow_1[138] + getitem_1323 = _foreach_pow_1[139] + getitem_1324 = _foreach_pow_1[140] + getitem_1325 = _foreach_pow_1[141] + getitem_1326 = _foreach_pow_1[142] + getitem_1327 = _foreach_pow_1[143] + getitem_1328 = _foreach_pow_1[144] + getitem_1329 = _foreach_pow_1[145] + getitem_1330 = _foreach_pow_1[146] + getitem_1331 = _foreach_pow_1[147]; _foreach_pow_1 = None + _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1); getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None + getitem_1332 = _foreach_sub_1[0] + getitem_1333 = _foreach_sub_1[1] + getitem_1334 = _foreach_sub_1[2] + getitem_1335 = _foreach_sub_1[3] + getitem_1336 = _foreach_sub_1[4] + getitem_1337 = _foreach_sub_1[5] + getitem_1338 = _foreach_sub_1[6] + getitem_1339 = _foreach_sub_1[7] + getitem_1340 = _foreach_sub_1[8] + getitem_1341 = _foreach_sub_1[9] + getitem_1342 = _foreach_sub_1[10] + getitem_1343 = _foreach_sub_1[11] + getitem_1344 = _foreach_sub_1[12] + getitem_1345 = _foreach_sub_1[13] + getitem_1346 = _foreach_sub_1[14] + getitem_1347 = _foreach_sub_1[15] + getitem_1348 = _foreach_sub_1[16] + getitem_1349 = _foreach_sub_1[17] + getitem_1350 = _foreach_sub_1[18] + getitem_1351 = _foreach_sub_1[19] + getitem_1352 = _foreach_sub_1[20] + getitem_1353 = _foreach_sub_1[21] + getitem_1354 = _foreach_sub_1[22] + getitem_1355 = _foreach_sub_1[23] + getitem_1356 = _foreach_sub_1[24] + getitem_1357 = _foreach_sub_1[25] + getitem_1358 = _foreach_sub_1[26] + getitem_1359 = _foreach_sub_1[27] + getitem_1360 = _foreach_sub_1[28] + getitem_1361 = _foreach_sub_1[29] + getitem_1362 = _foreach_sub_1[30] + getitem_1363 = _foreach_sub_1[31] + getitem_1364 = _foreach_sub_1[32] + getitem_1365 = _foreach_sub_1[33] + getitem_1366 = _foreach_sub_1[34] + getitem_1367 = _foreach_sub_1[35] + getitem_1368 = _foreach_sub_1[36] + getitem_1369 = _foreach_sub_1[37] + getitem_1370 = _foreach_sub_1[38] + getitem_1371 = _foreach_sub_1[39] + getitem_1372 = _foreach_sub_1[40] + getitem_1373 = _foreach_sub_1[41] + getitem_1374 = _foreach_sub_1[42] + getitem_1375 = _foreach_sub_1[43] + getitem_1376 = _foreach_sub_1[44] + getitem_1377 = _foreach_sub_1[45] + getitem_1378 = _foreach_sub_1[46] + getitem_1379 = _foreach_sub_1[47] + getitem_1380 = _foreach_sub_1[48] + getitem_1381 = _foreach_sub_1[49] + getitem_1382 = _foreach_sub_1[50] + getitem_1383 = _foreach_sub_1[51] + getitem_1384 = _foreach_sub_1[52] + getitem_1385 = _foreach_sub_1[53] + getitem_1386 = _foreach_sub_1[54] + getitem_1387 = _foreach_sub_1[55] + getitem_1388 = _foreach_sub_1[56] + getitem_1389 = _foreach_sub_1[57] + getitem_1390 = _foreach_sub_1[58] + getitem_1391 = _foreach_sub_1[59] + getitem_1392 = _foreach_sub_1[60] + getitem_1393 = _foreach_sub_1[61] + getitem_1394 = _foreach_sub_1[62] + getitem_1395 = _foreach_sub_1[63] + getitem_1396 = _foreach_sub_1[64] + getitem_1397 = _foreach_sub_1[65] + getitem_1398 = _foreach_sub_1[66] + getitem_1399 = _foreach_sub_1[67] + getitem_1400 = _foreach_sub_1[68] + getitem_1401 = _foreach_sub_1[69] + getitem_1402 = _foreach_sub_1[70] + getitem_1403 = _foreach_sub_1[71] + getitem_1404 = _foreach_sub_1[72] + getitem_1405 = _foreach_sub_1[73] + getitem_1406 = _foreach_sub_1[74] + getitem_1407 = _foreach_sub_1[75] + getitem_1408 = _foreach_sub_1[76] + getitem_1409 = _foreach_sub_1[77] + getitem_1410 = _foreach_sub_1[78] + getitem_1411 = _foreach_sub_1[79] + getitem_1412 = _foreach_sub_1[80] + getitem_1413 = _foreach_sub_1[81] + getitem_1414 = _foreach_sub_1[82] + getitem_1415 = _foreach_sub_1[83] + getitem_1416 = _foreach_sub_1[84] + getitem_1417 = _foreach_sub_1[85] + getitem_1418 = _foreach_sub_1[86] + getitem_1419 = _foreach_sub_1[87] + getitem_1420 = _foreach_sub_1[88] + getitem_1421 = _foreach_sub_1[89] + getitem_1422 = _foreach_sub_1[90] + getitem_1423 = _foreach_sub_1[91] + getitem_1424 = _foreach_sub_1[92] + getitem_1425 = _foreach_sub_1[93] + getitem_1426 = _foreach_sub_1[94] + getitem_1427 = _foreach_sub_1[95] + getitem_1428 = _foreach_sub_1[96] + getitem_1429 = _foreach_sub_1[97] + getitem_1430 = _foreach_sub_1[98] + getitem_1431 = _foreach_sub_1[99] + getitem_1432 = _foreach_sub_1[100] + getitem_1433 = _foreach_sub_1[101] + getitem_1434 = _foreach_sub_1[102] + getitem_1435 = _foreach_sub_1[103] + getitem_1436 = _foreach_sub_1[104] + getitem_1437 = _foreach_sub_1[105] + getitem_1438 = _foreach_sub_1[106] + getitem_1439 = _foreach_sub_1[107] + getitem_1440 = _foreach_sub_1[108] + getitem_1441 = _foreach_sub_1[109] + getitem_1442 = _foreach_sub_1[110] + getitem_1443 = _foreach_sub_1[111] + getitem_1444 = _foreach_sub_1[112] + getitem_1445 = _foreach_sub_1[113] + getitem_1446 = _foreach_sub_1[114] + getitem_1447 = _foreach_sub_1[115] + getitem_1448 = _foreach_sub_1[116] + getitem_1449 = _foreach_sub_1[117] + getitem_1450 = _foreach_sub_1[118] + getitem_1451 = _foreach_sub_1[119] + getitem_1452 = _foreach_sub_1[120] + getitem_1453 = _foreach_sub_1[121] + getitem_1454 = _foreach_sub_1[122] + getitem_1455 = _foreach_sub_1[123] + getitem_1456 = _foreach_sub_1[124] + getitem_1457 = _foreach_sub_1[125] + getitem_1458 = _foreach_sub_1[126] + getitem_1459 = _foreach_sub_1[127] + getitem_1460 = _foreach_sub_1[128] + getitem_1461 = _foreach_sub_1[129] + getitem_1462 = _foreach_sub_1[130] + getitem_1463 = _foreach_sub_1[131] + getitem_1464 = _foreach_sub_1[132] + getitem_1465 = _foreach_sub_1[133] + getitem_1466 = _foreach_sub_1[134] + getitem_1467 = _foreach_sub_1[135] + getitem_1468 = _foreach_sub_1[136] + getitem_1469 = _foreach_sub_1[137] + getitem_1470 = _foreach_sub_1[138] + getitem_1471 = _foreach_sub_1[139] + getitem_1472 = _foreach_sub_1[140] + getitem_1473 = _foreach_sub_1[141] + getitem_1474 = _foreach_sub_1[142] + getitem_1475 = _foreach_sub_1[143] + getitem_1476 = _foreach_sub_1[144] + getitem_1477 = _foreach_sub_1[145] + getitem_1478 = _foreach_sub_1[146] + getitem_1479 = _foreach_sub_1[147]; _foreach_sub_1 = None + _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1); getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None + getitem_1480 = _foreach_sub_2[0] + getitem_1481 = _foreach_sub_2[1] + getitem_1482 = _foreach_sub_2[2] + getitem_1483 = _foreach_sub_2[3] + getitem_1484 = _foreach_sub_2[4] + getitem_1485 = _foreach_sub_2[5] + getitem_1486 = _foreach_sub_2[6] + getitem_1487 = _foreach_sub_2[7] + getitem_1488 = _foreach_sub_2[8] + getitem_1489 = _foreach_sub_2[9] + getitem_1490 = _foreach_sub_2[10] + getitem_1491 = _foreach_sub_2[11] + getitem_1492 = _foreach_sub_2[12] + getitem_1493 = _foreach_sub_2[13] + getitem_1494 = _foreach_sub_2[14] + getitem_1495 = _foreach_sub_2[15] + getitem_1496 = _foreach_sub_2[16] + getitem_1497 = _foreach_sub_2[17] + getitem_1498 = _foreach_sub_2[18] + getitem_1499 = _foreach_sub_2[19] + getitem_1500 = _foreach_sub_2[20] + getitem_1501 = _foreach_sub_2[21] + getitem_1502 = _foreach_sub_2[22] + getitem_1503 = _foreach_sub_2[23] + getitem_1504 = _foreach_sub_2[24] + getitem_1505 = _foreach_sub_2[25] + getitem_1506 = _foreach_sub_2[26] + getitem_1507 = _foreach_sub_2[27] + getitem_1508 = _foreach_sub_2[28] + getitem_1509 = _foreach_sub_2[29] + getitem_1510 = _foreach_sub_2[30] + getitem_1511 = _foreach_sub_2[31] + getitem_1512 = _foreach_sub_2[32] + getitem_1513 = _foreach_sub_2[33] + getitem_1514 = _foreach_sub_2[34] + getitem_1515 = _foreach_sub_2[35] + getitem_1516 = _foreach_sub_2[36] + getitem_1517 = _foreach_sub_2[37] + getitem_1518 = _foreach_sub_2[38] + getitem_1519 = _foreach_sub_2[39] + getitem_1520 = _foreach_sub_2[40] + getitem_1521 = _foreach_sub_2[41] + getitem_1522 = _foreach_sub_2[42] + getitem_1523 = _foreach_sub_2[43] + getitem_1524 = _foreach_sub_2[44] + getitem_1525 = _foreach_sub_2[45] + getitem_1526 = _foreach_sub_2[46] + getitem_1527 = _foreach_sub_2[47] + getitem_1528 = _foreach_sub_2[48] + getitem_1529 = _foreach_sub_2[49] + getitem_1530 = _foreach_sub_2[50] + getitem_1531 = _foreach_sub_2[51] + getitem_1532 = _foreach_sub_2[52] + getitem_1533 = _foreach_sub_2[53] + getitem_1534 = _foreach_sub_2[54] + getitem_1535 = _foreach_sub_2[55] + getitem_1536 = _foreach_sub_2[56] + getitem_1537 = _foreach_sub_2[57] + getitem_1538 = _foreach_sub_2[58] + getitem_1539 = _foreach_sub_2[59] + getitem_1540 = _foreach_sub_2[60] + getitem_1541 = _foreach_sub_2[61] + getitem_1542 = _foreach_sub_2[62] + getitem_1543 = _foreach_sub_2[63] + getitem_1544 = _foreach_sub_2[64] + getitem_1545 = _foreach_sub_2[65] + getitem_1546 = _foreach_sub_2[66] + getitem_1547 = _foreach_sub_2[67] + getitem_1548 = _foreach_sub_2[68] + getitem_1549 = _foreach_sub_2[69] + getitem_1550 = _foreach_sub_2[70] + getitem_1551 = _foreach_sub_2[71] + getitem_1552 = _foreach_sub_2[72] + getitem_1553 = _foreach_sub_2[73] + getitem_1554 = _foreach_sub_2[74] + getitem_1555 = _foreach_sub_2[75] + getitem_1556 = _foreach_sub_2[76] + getitem_1557 = _foreach_sub_2[77] + getitem_1558 = _foreach_sub_2[78] + getitem_1559 = _foreach_sub_2[79] + getitem_1560 = _foreach_sub_2[80] + getitem_1561 = _foreach_sub_2[81] + getitem_1562 = _foreach_sub_2[82] + getitem_1563 = _foreach_sub_2[83] + getitem_1564 = _foreach_sub_2[84] + getitem_1565 = _foreach_sub_2[85] + getitem_1566 = _foreach_sub_2[86] + getitem_1567 = _foreach_sub_2[87] + getitem_1568 = _foreach_sub_2[88] + getitem_1569 = _foreach_sub_2[89] + getitem_1570 = _foreach_sub_2[90] + getitem_1571 = _foreach_sub_2[91] + getitem_1572 = _foreach_sub_2[92] + getitem_1573 = _foreach_sub_2[93] + getitem_1574 = _foreach_sub_2[94] + getitem_1575 = _foreach_sub_2[95] + getitem_1576 = _foreach_sub_2[96] + getitem_1577 = _foreach_sub_2[97] + getitem_1578 = _foreach_sub_2[98] + getitem_1579 = _foreach_sub_2[99] + getitem_1580 = _foreach_sub_2[100] + getitem_1581 = _foreach_sub_2[101] + getitem_1582 = _foreach_sub_2[102] + getitem_1583 = _foreach_sub_2[103] + getitem_1584 = _foreach_sub_2[104] + getitem_1585 = _foreach_sub_2[105] + getitem_1586 = _foreach_sub_2[106] + getitem_1587 = _foreach_sub_2[107] + getitem_1588 = _foreach_sub_2[108] + getitem_1589 = _foreach_sub_2[109] + getitem_1590 = _foreach_sub_2[110] + getitem_1591 = _foreach_sub_2[111] + getitem_1592 = _foreach_sub_2[112] + getitem_1593 = _foreach_sub_2[113] + getitem_1594 = _foreach_sub_2[114] + getitem_1595 = _foreach_sub_2[115] + getitem_1596 = _foreach_sub_2[116] + getitem_1597 = _foreach_sub_2[117] + getitem_1598 = _foreach_sub_2[118] + getitem_1599 = _foreach_sub_2[119] + getitem_1600 = _foreach_sub_2[120] + getitem_1601 = _foreach_sub_2[121] + getitem_1602 = _foreach_sub_2[122] + getitem_1603 = _foreach_sub_2[123] + getitem_1604 = _foreach_sub_2[124] + getitem_1605 = _foreach_sub_2[125] + getitem_1606 = _foreach_sub_2[126] + getitem_1607 = _foreach_sub_2[127] + getitem_1608 = _foreach_sub_2[128] + getitem_1609 = _foreach_sub_2[129] + getitem_1610 = _foreach_sub_2[130] + getitem_1611 = _foreach_sub_2[131] + getitem_1612 = _foreach_sub_2[132] + getitem_1613 = _foreach_sub_2[133] + getitem_1614 = _foreach_sub_2[134] + getitem_1615 = _foreach_sub_2[135] + getitem_1616 = _foreach_sub_2[136] + getitem_1617 = _foreach_sub_2[137] + getitem_1618 = _foreach_sub_2[138] + getitem_1619 = _foreach_sub_2[139] + getitem_1620 = _foreach_sub_2[140] + getitem_1621 = _foreach_sub_2[141] + getitem_1622 = _foreach_sub_2[142] + getitem_1623 = _foreach_sub_2[143] + getitem_1624 = _foreach_sub_2[144] + getitem_1625 = _foreach_sub_2[145] + getitem_1626 = _foreach_sub_2[146] + getitem_1627 = _foreach_sub_2[147]; _foreach_sub_2 = None + _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]); getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None + getitem_1628 = _foreach_neg[0] + getitem_1629 = _foreach_neg[1] + getitem_1630 = _foreach_neg[2] + getitem_1631 = _foreach_neg[3] + getitem_1632 = _foreach_neg[4] + getitem_1633 = _foreach_neg[5] + getitem_1634 = _foreach_neg[6] + getitem_1635 = _foreach_neg[7] + getitem_1636 = _foreach_neg[8] + getitem_1637 = _foreach_neg[9] + getitem_1638 = _foreach_neg[10] + getitem_1639 = _foreach_neg[11] + getitem_1640 = _foreach_neg[12] + getitem_1641 = _foreach_neg[13] + getitem_1642 = _foreach_neg[14] + getitem_1643 = _foreach_neg[15] + getitem_1644 = _foreach_neg[16] + getitem_1645 = _foreach_neg[17] + getitem_1646 = _foreach_neg[18] + getitem_1647 = _foreach_neg[19] + getitem_1648 = _foreach_neg[20] + getitem_1649 = _foreach_neg[21] + getitem_1650 = _foreach_neg[22] + getitem_1651 = _foreach_neg[23] + getitem_1652 = _foreach_neg[24] + getitem_1653 = _foreach_neg[25] + getitem_1654 = _foreach_neg[26] + getitem_1655 = _foreach_neg[27] + getitem_1656 = _foreach_neg[28] + getitem_1657 = _foreach_neg[29] + getitem_1658 = _foreach_neg[30] + getitem_1659 = _foreach_neg[31] + getitem_1660 = _foreach_neg[32] + getitem_1661 = _foreach_neg[33] + getitem_1662 = _foreach_neg[34] + getitem_1663 = _foreach_neg[35] + getitem_1664 = _foreach_neg[36] + getitem_1665 = _foreach_neg[37] + getitem_1666 = _foreach_neg[38] + getitem_1667 = _foreach_neg[39] + getitem_1668 = _foreach_neg[40] + getitem_1669 = _foreach_neg[41] + getitem_1670 = _foreach_neg[42] + getitem_1671 = _foreach_neg[43] + getitem_1672 = _foreach_neg[44] + getitem_1673 = _foreach_neg[45] + getitem_1674 = _foreach_neg[46] + getitem_1675 = _foreach_neg[47] + getitem_1676 = _foreach_neg[48] + getitem_1677 = _foreach_neg[49] + getitem_1678 = _foreach_neg[50] + getitem_1679 = _foreach_neg[51] + getitem_1680 = _foreach_neg[52] + getitem_1681 = _foreach_neg[53] + getitem_1682 = _foreach_neg[54] + getitem_1683 = _foreach_neg[55] + getitem_1684 = _foreach_neg[56] + getitem_1685 = _foreach_neg[57] + getitem_1686 = _foreach_neg[58] + getitem_1687 = _foreach_neg[59] + getitem_1688 = _foreach_neg[60] + getitem_1689 = _foreach_neg[61] + getitem_1690 = _foreach_neg[62] + getitem_1691 = _foreach_neg[63] + getitem_1692 = _foreach_neg[64] + getitem_1693 = _foreach_neg[65] + getitem_1694 = _foreach_neg[66] + getitem_1695 = _foreach_neg[67] + getitem_1696 = _foreach_neg[68] + getitem_1697 = _foreach_neg[69] + getitem_1698 = _foreach_neg[70] + getitem_1699 = _foreach_neg[71] + getitem_1700 = _foreach_neg[72] + getitem_1701 = _foreach_neg[73] + getitem_1702 = _foreach_neg[74] + getitem_1703 = _foreach_neg[75] + getitem_1704 = _foreach_neg[76] + getitem_1705 = _foreach_neg[77] + getitem_1706 = _foreach_neg[78] + getitem_1707 = _foreach_neg[79] + getitem_1708 = _foreach_neg[80] + getitem_1709 = _foreach_neg[81] + getitem_1710 = _foreach_neg[82] + getitem_1711 = _foreach_neg[83] + getitem_1712 = _foreach_neg[84] + getitem_1713 = _foreach_neg[85] + getitem_1714 = _foreach_neg[86] + getitem_1715 = _foreach_neg[87] + getitem_1716 = _foreach_neg[88] + getitem_1717 = _foreach_neg[89] + getitem_1718 = _foreach_neg[90] + getitem_1719 = _foreach_neg[91] + getitem_1720 = _foreach_neg[92] + getitem_1721 = _foreach_neg[93] + getitem_1722 = _foreach_neg[94] + getitem_1723 = _foreach_neg[95] + getitem_1724 = _foreach_neg[96] + getitem_1725 = _foreach_neg[97] + getitem_1726 = _foreach_neg[98] + getitem_1727 = _foreach_neg[99] + getitem_1728 = _foreach_neg[100] + getitem_1729 = _foreach_neg[101] + getitem_1730 = _foreach_neg[102] + getitem_1731 = _foreach_neg[103] + getitem_1732 = _foreach_neg[104] + getitem_1733 = _foreach_neg[105] + getitem_1734 = _foreach_neg[106] + getitem_1735 = _foreach_neg[107] + getitem_1736 = _foreach_neg[108] + getitem_1737 = _foreach_neg[109] + getitem_1738 = _foreach_neg[110] + getitem_1739 = _foreach_neg[111] + getitem_1740 = _foreach_neg[112] + getitem_1741 = _foreach_neg[113] + getitem_1742 = _foreach_neg[114] + getitem_1743 = _foreach_neg[115] + getitem_1744 = _foreach_neg[116] + getitem_1745 = _foreach_neg[117] + getitem_1746 = _foreach_neg[118] + getitem_1747 = _foreach_neg[119] + getitem_1748 = _foreach_neg[120] + getitem_1749 = _foreach_neg[121] + getitem_1750 = _foreach_neg[122] + getitem_1751 = _foreach_neg[123] + getitem_1752 = _foreach_neg[124] + getitem_1753 = _foreach_neg[125] + getitem_1754 = _foreach_neg[126] + getitem_1755 = _foreach_neg[127] + getitem_1756 = _foreach_neg[128] + getitem_1757 = _foreach_neg[129] + getitem_1758 = _foreach_neg[130] + getitem_1759 = _foreach_neg[131] + getitem_1760 = _foreach_neg[132] + getitem_1761 = _foreach_neg[133] + getitem_1762 = _foreach_neg[134] + getitem_1763 = _foreach_neg[135] + getitem_1764 = _foreach_neg[136] + getitem_1765 = _foreach_neg[137] + getitem_1766 = _foreach_neg[138] + getitem_1767 = _foreach_neg[139] + getitem_1768 = _foreach_neg[140] + getitem_1769 = _foreach_neg[141] + getitem_1770 = _foreach_neg[142] + getitem_1771 = _foreach_neg[143] + getitem_1772 = _foreach_neg[144] + getitem_1773 = _foreach_neg[145] + getitem_1774 = _foreach_neg[146] + getitem_1775 = _foreach_neg[147]; _foreach_neg = None + _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01); getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None + getitem_1776 = _foreach_div[0] + getitem_1777 = _foreach_div[1] + getitem_1778 = _foreach_div[2] + getitem_1779 = _foreach_div[3] + getitem_1780 = _foreach_div[4] + getitem_1781 = _foreach_div[5] + getitem_1782 = _foreach_div[6] + getitem_1783 = _foreach_div[7] + getitem_1784 = _foreach_div[8] + getitem_1785 = _foreach_div[9] + getitem_1786 = _foreach_div[10] + getitem_1787 = _foreach_div[11] + getitem_1788 = _foreach_div[12] + getitem_1789 = _foreach_div[13] + getitem_1790 = _foreach_div[14] + getitem_1791 = _foreach_div[15] + getitem_1792 = _foreach_div[16] + getitem_1793 = _foreach_div[17] + getitem_1794 = _foreach_div[18] + getitem_1795 = _foreach_div[19] + getitem_1796 = _foreach_div[20] + getitem_1797 = _foreach_div[21] + getitem_1798 = _foreach_div[22] + getitem_1799 = _foreach_div[23] + getitem_1800 = _foreach_div[24] + getitem_1801 = _foreach_div[25] + getitem_1802 = _foreach_div[26] + getitem_1803 = _foreach_div[27] + getitem_1804 = _foreach_div[28] + getitem_1805 = _foreach_div[29] + getitem_1806 = _foreach_div[30] + getitem_1807 = _foreach_div[31] + getitem_1808 = _foreach_div[32] + getitem_1809 = _foreach_div[33] + getitem_1810 = _foreach_div[34] + getitem_1811 = _foreach_div[35] + getitem_1812 = _foreach_div[36] + getitem_1813 = _foreach_div[37] + getitem_1814 = _foreach_div[38] + getitem_1815 = _foreach_div[39] + getitem_1816 = _foreach_div[40] + getitem_1817 = _foreach_div[41] + getitem_1818 = _foreach_div[42] + getitem_1819 = _foreach_div[43] + getitem_1820 = _foreach_div[44] + getitem_1821 = _foreach_div[45] + getitem_1822 = _foreach_div[46] + getitem_1823 = _foreach_div[47] + getitem_1824 = _foreach_div[48] + getitem_1825 = _foreach_div[49] + getitem_1826 = _foreach_div[50] + getitem_1827 = _foreach_div[51] + getitem_1828 = _foreach_div[52] + getitem_1829 = _foreach_div[53] + getitem_1830 = _foreach_div[54] + getitem_1831 = _foreach_div[55] + getitem_1832 = _foreach_div[56] + getitem_1833 = _foreach_div[57] + getitem_1834 = _foreach_div[58] + getitem_1835 = _foreach_div[59] + getitem_1836 = _foreach_div[60] + getitem_1837 = _foreach_div[61] + getitem_1838 = _foreach_div[62] + getitem_1839 = _foreach_div[63] + getitem_1840 = _foreach_div[64] + getitem_1841 = _foreach_div[65] + getitem_1842 = _foreach_div[66] + getitem_1843 = _foreach_div[67] + getitem_1844 = _foreach_div[68] + getitem_1845 = _foreach_div[69] + getitem_1846 = _foreach_div[70] + getitem_1847 = _foreach_div[71] + getitem_1848 = _foreach_div[72] + getitem_1849 = _foreach_div[73] + getitem_1850 = _foreach_div[74] + getitem_1851 = _foreach_div[75] + getitem_1852 = _foreach_div[76] + getitem_1853 = _foreach_div[77] + getitem_1854 = _foreach_div[78] + getitem_1855 = _foreach_div[79] + getitem_1856 = _foreach_div[80] + getitem_1857 = _foreach_div[81] + getitem_1858 = _foreach_div[82] + getitem_1859 = _foreach_div[83] + getitem_1860 = _foreach_div[84] + getitem_1861 = _foreach_div[85] + getitem_1862 = _foreach_div[86] + getitem_1863 = _foreach_div[87] + getitem_1864 = _foreach_div[88] + getitem_1865 = _foreach_div[89] + getitem_1866 = _foreach_div[90] + getitem_1867 = _foreach_div[91] + getitem_1868 = _foreach_div[92] + getitem_1869 = _foreach_div[93] + getitem_1870 = _foreach_div[94] + getitem_1871 = _foreach_div[95] + getitem_1872 = _foreach_div[96] + getitem_1873 = _foreach_div[97] + getitem_1874 = _foreach_div[98] + getitem_1875 = _foreach_div[99] + getitem_1876 = _foreach_div[100] + getitem_1877 = _foreach_div[101] + getitem_1878 = _foreach_div[102] + getitem_1879 = _foreach_div[103] + getitem_1880 = _foreach_div[104] + getitem_1881 = _foreach_div[105] + getitem_1882 = _foreach_div[106] + getitem_1883 = _foreach_div[107] + getitem_1884 = _foreach_div[108] + getitem_1885 = _foreach_div[109] + getitem_1886 = _foreach_div[110] + getitem_1887 = _foreach_div[111] + getitem_1888 = _foreach_div[112] + getitem_1889 = _foreach_div[113] + getitem_1890 = _foreach_div[114] + getitem_1891 = _foreach_div[115] + getitem_1892 = _foreach_div[116] + getitem_1893 = _foreach_div[117] + getitem_1894 = _foreach_div[118] + getitem_1895 = _foreach_div[119] + getitem_1896 = _foreach_div[120] + getitem_1897 = _foreach_div[121] + getitem_1898 = _foreach_div[122] + getitem_1899 = _foreach_div[123] + getitem_1900 = _foreach_div[124] + getitem_1901 = _foreach_div[125] + getitem_1902 = _foreach_div[126] + getitem_1903 = _foreach_div[127] + getitem_1904 = _foreach_div[128] + getitem_1905 = _foreach_div[129] + getitem_1906 = _foreach_div[130] + getitem_1907 = _foreach_div[131] + getitem_1908 = _foreach_div[132] + getitem_1909 = _foreach_div[133] + getitem_1910 = _foreach_div[134] + getitem_1911 = _foreach_div[135] + getitem_1912 = _foreach_div[136] + getitem_1913 = _foreach_div[137] + getitem_1914 = _foreach_div[138] + getitem_1915 = _foreach_div[139] + getitem_1916 = _foreach_div[140] + getitem_1917 = _foreach_div[141] + getitem_1918 = _foreach_div[142] + getitem_1919 = _foreach_div[143] + getitem_1920 = _foreach_div[144] + getitem_1921 = _foreach_div[145] + getitem_1922 = _foreach_div[146] + getitem_1923 = _foreach_div[147]; _foreach_div = None + _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]); getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None + getitem_1924 = _foreach_reciprocal[0] + getitem_1925 = _foreach_reciprocal[1] + getitem_1926 = _foreach_reciprocal[2] + getitem_1927 = _foreach_reciprocal[3] + getitem_1928 = _foreach_reciprocal[4] + getitem_1929 = _foreach_reciprocal[5] + getitem_1930 = _foreach_reciprocal[6] + getitem_1931 = _foreach_reciprocal[7] + getitem_1932 = _foreach_reciprocal[8] + getitem_1933 = _foreach_reciprocal[9] + getitem_1934 = _foreach_reciprocal[10] + getitem_1935 = _foreach_reciprocal[11] + getitem_1936 = _foreach_reciprocal[12] + getitem_1937 = _foreach_reciprocal[13] + getitem_1938 = _foreach_reciprocal[14] + getitem_1939 = _foreach_reciprocal[15] + getitem_1940 = _foreach_reciprocal[16] + getitem_1941 = _foreach_reciprocal[17] + getitem_1942 = _foreach_reciprocal[18] + getitem_1943 = _foreach_reciprocal[19] + getitem_1944 = _foreach_reciprocal[20] + getitem_1945 = _foreach_reciprocal[21] + getitem_1946 = _foreach_reciprocal[22] + getitem_1947 = _foreach_reciprocal[23] + getitem_1948 = _foreach_reciprocal[24] + getitem_1949 = _foreach_reciprocal[25] + getitem_1950 = _foreach_reciprocal[26] + getitem_1951 = _foreach_reciprocal[27] + getitem_1952 = _foreach_reciprocal[28] + getitem_1953 = _foreach_reciprocal[29] + getitem_1954 = _foreach_reciprocal[30] + getitem_1955 = _foreach_reciprocal[31] + getitem_1956 = _foreach_reciprocal[32] + getitem_1957 = _foreach_reciprocal[33] + getitem_1958 = _foreach_reciprocal[34] + getitem_1959 = _foreach_reciprocal[35] + getitem_1960 = _foreach_reciprocal[36] + getitem_1961 = _foreach_reciprocal[37] + getitem_1962 = _foreach_reciprocal[38] + getitem_1963 = _foreach_reciprocal[39] + getitem_1964 = _foreach_reciprocal[40] + getitem_1965 = _foreach_reciprocal[41] + getitem_1966 = _foreach_reciprocal[42] + getitem_1967 = _foreach_reciprocal[43] + getitem_1968 = _foreach_reciprocal[44] + getitem_1969 = _foreach_reciprocal[45] + getitem_1970 = _foreach_reciprocal[46] + getitem_1971 = _foreach_reciprocal[47] + getitem_1972 = _foreach_reciprocal[48] + getitem_1973 = _foreach_reciprocal[49] + getitem_1974 = _foreach_reciprocal[50] + getitem_1975 = _foreach_reciprocal[51] + getitem_1976 = _foreach_reciprocal[52] + getitem_1977 = _foreach_reciprocal[53] + getitem_1978 = _foreach_reciprocal[54] + getitem_1979 = _foreach_reciprocal[55] + getitem_1980 = _foreach_reciprocal[56] + getitem_1981 = _foreach_reciprocal[57] + getitem_1982 = _foreach_reciprocal[58] + getitem_1983 = _foreach_reciprocal[59] + getitem_1984 = _foreach_reciprocal[60] + getitem_1985 = _foreach_reciprocal[61] + getitem_1986 = _foreach_reciprocal[62] + getitem_1987 = _foreach_reciprocal[63] + getitem_1988 = _foreach_reciprocal[64] + getitem_1989 = _foreach_reciprocal[65] + getitem_1990 = _foreach_reciprocal[66] + getitem_1991 = _foreach_reciprocal[67] + getitem_1992 = _foreach_reciprocal[68] + getitem_1993 = _foreach_reciprocal[69] + getitem_1994 = _foreach_reciprocal[70] + getitem_1995 = _foreach_reciprocal[71] + getitem_1996 = _foreach_reciprocal[72] + getitem_1997 = _foreach_reciprocal[73] + getitem_1998 = _foreach_reciprocal[74] + getitem_1999 = _foreach_reciprocal[75] + getitem_2000 = _foreach_reciprocal[76] + getitem_2001 = _foreach_reciprocal[77] + getitem_2002 = _foreach_reciprocal[78] + getitem_2003 = _foreach_reciprocal[79] + getitem_2004 = _foreach_reciprocal[80] + getitem_2005 = _foreach_reciprocal[81] + getitem_2006 = _foreach_reciprocal[82] + getitem_2007 = _foreach_reciprocal[83] + getitem_2008 = _foreach_reciprocal[84] + getitem_2009 = _foreach_reciprocal[85] + getitem_2010 = _foreach_reciprocal[86] + getitem_2011 = _foreach_reciprocal[87] + getitem_2012 = _foreach_reciprocal[88] + getitem_2013 = _foreach_reciprocal[89] + getitem_2014 = _foreach_reciprocal[90] + getitem_2015 = _foreach_reciprocal[91] + getitem_2016 = _foreach_reciprocal[92] + getitem_2017 = _foreach_reciprocal[93] + getitem_2018 = _foreach_reciprocal[94] + getitem_2019 = _foreach_reciprocal[95] + getitem_2020 = _foreach_reciprocal[96] + getitem_2021 = _foreach_reciprocal[97] + getitem_2022 = _foreach_reciprocal[98] + getitem_2023 = _foreach_reciprocal[99] + getitem_2024 = _foreach_reciprocal[100] + getitem_2025 = _foreach_reciprocal[101] + getitem_2026 = _foreach_reciprocal[102] + getitem_2027 = _foreach_reciprocal[103] + getitem_2028 = _foreach_reciprocal[104] + getitem_2029 = _foreach_reciprocal[105] + getitem_2030 = _foreach_reciprocal[106] + getitem_2031 = _foreach_reciprocal[107] + getitem_2032 = _foreach_reciprocal[108] + getitem_2033 = _foreach_reciprocal[109] + getitem_2034 = _foreach_reciprocal[110] + getitem_2035 = _foreach_reciprocal[111] + getitem_2036 = _foreach_reciprocal[112] + getitem_2037 = _foreach_reciprocal[113] + getitem_2038 = _foreach_reciprocal[114] + getitem_2039 = _foreach_reciprocal[115] + getitem_2040 = _foreach_reciprocal[116] + getitem_2041 = _foreach_reciprocal[117] + getitem_2042 = _foreach_reciprocal[118] + getitem_2043 = _foreach_reciprocal[119] + getitem_2044 = _foreach_reciprocal[120] + getitem_2045 = _foreach_reciprocal[121] + getitem_2046 = _foreach_reciprocal[122] + getitem_2047 = _foreach_reciprocal[123] + getitem_2048 = _foreach_reciprocal[124] + getitem_2049 = _foreach_reciprocal[125] + getitem_2050 = _foreach_reciprocal[126] + getitem_2051 = _foreach_reciprocal[127] + getitem_2052 = _foreach_reciprocal[128] + getitem_2053 = _foreach_reciprocal[129] + getitem_2054 = _foreach_reciprocal[130] + getitem_2055 = _foreach_reciprocal[131] + getitem_2056 = _foreach_reciprocal[132] + getitem_2057 = _foreach_reciprocal[133] + getitem_2058 = _foreach_reciprocal[134] + getitem_2059 = _foreach_reciprocal[135] + getitem_2060 = _foreach_reciprocal[136] + getitem_2061 = _foreach_reciprocal[137] + getitem_2062 = _foreach_reciprocal[138] + getitem_2063 = _foreach_reciprocal[139] + getitem_2064 = _foreach_reciprocal[140] + getitem_2065 = _foreach_reciprocal[141] + getitem_2066 = _foreach_reciprocal[142] + getitem_2067 = _foreach_reciprocal[143] + getitem_2068 = _foreach_reciprocal[144] + getitem_2069 = _foreach_reciprocal[145] + getitem_2070 = _foreach_reciprocal[146] + getitem_2071 = _foreach_reciprocal[147]; _foreach_reciprocal = None + _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]); getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None + getitem_2072 = _foreach_sqrt[0] + getitem_2073 = _foreach_sqrt[1] + getitem_2074 = _foreach_sqrt[2] + getitem_2075 = _foreach_sqrt[3] + getitem_2076 = _foreach_sqrt[4] + getitem_2077 = _foreach_sqrt[5] + getitem_2078 = _foreach_sqrt[6] + getitem_2079 = _foreach_sqrt[7] + getitem_2080 = _foreach_sqrt[8] + getitem_2081 = _foreach_sqrt[9] + getitem_2082 = _foreach_sqrt[10] + getitem_2083 = _foreach_sqrt[11] + getitem_2084 = _foreach_sqrt[12] + getitem_2085 = _foreach_sqrt[13] + getitem_2086 = _foreach_sqrt[14] + getitem_2087 = _foreach_sqrt[15] + getitem_2088 = _foreach_sqrt[16] + getitem_2089 = _foreach_sqrt[17] + getitem_2090 = _foreach_sqrt[18] + getitem_2091 = _foreach_sqrt[19] + getitem_2092 = _foreach_sqrt[20] + getitem_2093 = _foreach_sqrt[21] + getitem_2094 = _foreach_sqrt[22] + getitem_2095 = _foreach_sqrt[23] + getitem_2096 = _foreach_sqrt[24] + getitem_2097 = _foreach_sqrt[25] + getitem_2098 = _foreach_sqrt[26] + getitem_2099 = _foreach_sqrt[27] + getitem_2100 = _foreach_sqrt[28] + getitem_2101 = _foreach_sqrt[29] + getitem_2102 = _foreach_sqrt[30] + getitem_2103 = _foreach_sqrt[31] + getitem_2104 = _foreach_sqrt[32] + getitem_2105 = _foreach_sqrt[33] + getitem_2106 = _foreach_sqrt[34] + getitem_2107 = _foreach_sqrt[35] + getitem_2108 = _foreach_sqrt[36] + getitem_2109 = _foreach_sqrt[37] + getitem_2110 = _foreach_sqrt[38] + getitem_2111 = _foreach_sqrt[39] + getitem_2112 = _foreach_sqrt[40] + getitem_2113 = _foreach_sqrt[41] + getitem_2114 = _foreach_sqrt[42] + getitem_2115 = _foreach_sqrt[43] + getitem_2116 = _foreach_sqrt[44] + getitem_2117 = _foreach_sqrt[45] + getitem_2118 = _foreach_sqrt[46] + getitem_2119 = _foreach_sqrt[47] + getitem_2120 = _foreach_sqrt[48] + getitem_2121 = _foreach_sqrt[49] + getitem_2122 = _foreach_sqrt[50] + getitem_2123 = _foreach_sqrt[51] + getitem_2124 = _foreach_sqrt[52] + getitem_2125 = _foreach_sqrt[53] + getitem_2126 = _foreach_sqrt[54] + getitem_2127 = _foreach_sqrt[55] + getitem_2128 = _foreach_sqrt[56] + getitem_2129 = _foreach_sqrt[57] + getitem_2130 = _foreach_sqrt[58] + getitem_2131 = _foreach_sqrt[59] + getitem_2132 = _foreach_sqrt[60] + getitem_2133 = _foreach_sqrt[61] + getitem_2134 = _foreach_sqrt[62] + getitem_2135 = _foreach_sqrt[63] + getitem_2136 = _foreach_sqrt[64] + getitem_2137 = _foreach_sqrt[65] + getitem_2138 = _foreach_sqrt[66] + getitem_2139 = _foreach_sqrt[67] + getitem_2140 = _foreach_sqrt[68] + getitem_2141 = _foreach_sqrt[69] + getitem_2142 = _foreach_sqrt[70] + getitem_2143 = _foreach_sqrt[71] + getitem_2144 = _foreach_sqrt[72] + getitem_2145 = _foreach_sqrt[73] + getitem_2146 = _foreach_sqrt[74] + getitem_2147 = _foreach_sqrt[75] + getitem_2148 = _foreach_sqrt[76] + getitem_2149 = _foreach_sqrt[77] + getitem_2150 = _foreach_sqrt[78] + getitem_2151 = _foreach_sqrt[79] + getitem_2152 = _foreach_sqrt[80] + getitem_2153 = _foreach_sqrt[81] + getitem_2154 = _foreach_sqrt[82] + getitem_2155 = _foreach_sqrt[83] + getitem_2156 = _foreach_sqrt[84] + getitem_2157 = _foreach_sqrt[85] + getitem_2158 = _foreach_sqrt[86] + getitem_2159 = _foreach_sqrt[87] + getitem_2160 = _foreach_sqrt[88] + getitem_2161 = _foreach_sqrt[89] + getitem_2162 = _foreach_sqrt[90] + getitem_2163 = _foreach_sqrt[91] + getitem_2164 = _foreach_sqrt[92] + getitem_2165 = _foreach_sqrt[93] + getitem_2166 = _foreach_sqrt[94] + getitem_2167 = _foreach_sqrt[95] + getitem_2168 = _foreach_sqrt[96] + getitem_2169 = _foreach_sqrt[97] + getitem_2170 = _foreach_sqrt[98] + getitem_2171 = _foreach_sqrt[99] + getitem_2172 = _foreach_sqrt[100] + getitem_2173 = _foreach_sqrt[101] + getitem_2174 = _foreach_sqrt[102] + getitem_2175 = _foreach_sqrt[103] + getitem_2176 = _foreach_sqrt[104] + getitem_2177 = _foreach_sqrt[105] + getitem_2178 = _foreach_sqrt[106] + getitem_2179 = _foreach_sqrt[107] + getitem_2180 = _foreach_sqrt[108] + getitem_2181 = _foreach_sqrt[109] + getitem_2182 = _foreach_sqrt[110] + getitem_2183 = _foreach_sqrt[111] + getitem_2184 = _foreach_sqrt[112] + getitem_2185 = _foreach_sqrt[113] + getitem_2186 = _foreach_sqrt[114] + getitem_2187 = _foreach_sqrt[115] + getitem_2188 = _foreach_sqrt[116] + getitem_2189 = _foreach_sqrt[117] + getitem_2190 = _foreach_sqrt[118] + getitem_2191 = _foreach_sqrt[119] + getitem_2192 = _foreach_sqrt[120] + getitem_2193 = _foreach_sqrt[121] + getitem_2194 = _foreach_sqrt[122] + getitem_2195 = _foreach_sqrt[123] + getitem_2196 = _foreach_sqrt[124] + getitem_2197 = _foreach_sqrt[125] + getitem_2198 = _foreach_sqrt[126] + getitem_2199 = _foreach_sqrt[127] + getitem_2200 = _foreach_sqrt[128] + getitem_2201 = _foreach_sqrt[129] + getitem_2202 = _foreach_sqrt[130] + getitem_2203 = _foreach_sqrt[131] + getitem_2204 = _foreach_sqrt[132] + getitem_2205 = _foreach_sqrt[133] + getitem_2206 = _foreach_sqrt[134] + getitem_2207 = _foreach_sqrt[135] + getitem_2208 = _foreach_sqrt[136] + getitem_2209 = _foreach_sqrt[137] + getitem_2210 = _foreach_sqrt[138] + getitem_2211 = _foreach_sqrt[139] + getitem_2212 = _foreach_sqrt[140] + getitem_2213 = _foreach_sqrt[141] + getitem_2214 = _foreach_sqrt[142] + getitem_2215 = _foreach_sqrt[143] + getitem_2216 = _foreach_sqrt[144] + getitem_2217 = _foreach_sqrt[145] + getitem_2218 = _foreach_sqrt[146] + getitem_2219 = _foreach_sqrt[147]; _foreach_sqrt = None + _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035]) + getitem_2220 = _foreach_sqrt_1[0] + getitem_2221 = _foreach_sqrt_1[1] + getitem_2222 = _foreach_sqrt_1[2] + getitem_2223 = _foreach_sqrt_1[3] + getitem_2224 = _foreach_sqrt_1[4] + getitem_2225 = _foreach_sqrt_1[5] + getitem_2226 = _foreach_sqrt_1[6] + getitem_2227 = _foreach_sqrt_1[7] + getitem_2228 = _foreach_sqrt_1[8] + getitem_2229 = _foreach_sqrt_1[9] + getitem_2230 = _foreach_sqrt_1[10] + getitem_2231 = _foreach_sqrt_1[11] + getitem_2232 = _foreach_sqrt_1[12] + getitem_2233 = _foreach_sqrt_1[13] + getitem_2234 = _foreach_sqrt_1[14] + getitem_2235 = _foreach_sqrt_1[15] + getitem_2236 = _foreach_sqrt_1[16] + getitem_2237 = _foreach_sqrt_1[17] + getitem_2238 = _foreach_sqrt_1[18] + getitem_2239 = _foreach_sqrt_1[19] + getitem_2240 = _foreach_sqrt_1[20] + getitem_2241 = _foreach_sqrt_1[21] + getitem_2242 = _foreach_sqrt_1[22] + getitem_2243 = _foreach_sqrt_1[23] + getitem_2244 = _foreach_sqrt_1[24] + getitem_2245 = _foreach_sqrt_1[25] + getitem_2246 = _foreach_sqrt_1[26] + getitem_2247 = _foreach_sqrt_1[27] + getitem_2248 = _foreach_sqrt_1[28] + getitem_2249 = _foreach_sqrt_1[29] + getitem_2250 = _foreach_sqrt_1[30] + getitem_2251 = _foreach_sqrt_1[31] + getitem_2252 = _foreach_sqrt_1[32] + getitem_2253 = _foreach_sqrt_1[33] + getitem_2254 = _foreach_sqrt_1[34] + getitem_2255 = _foreach_sqrt_1[35] + getitem_2256 = _foreach_sqrt_1[36] + getitem_2257 = _foreach_sqrt_1[37] + getitem_2258 = _foreach_sqrt_1[38] + getitem_2259 = _foreach_sqrt_1[39] + getitem_2260 = _foreach_sqrt_1[40] + getitem_2261 = _foreach_sqrt_1[41] + getitem_2262 = _foreach_sqrt_1[42] + getitem_2263 = _foreach_sqrt_1[43] + getitem_2264 = _foreach_sqrt_1[44] + getitem_2265 = _foreach_sqrt_1[45] + getitem_2266 = _foreach_sqrt_1[46] + getitem_2267 = _foreach_sqrt_1[47] + getitem_2268 = _foreach_sqrt_1[48] + getitem_2269 = _foreach_sqrt_1[49] + getitem_2270 = _foreach_sqrt_1[50] + getitem_2271 = _foreach_sqrt_1[51] + getitem_2272 = _foreach_sqrt_1[52] + getitem_2273 = _foreach_sqrt_1[53] + getitem_2274 = _foreach_sqrt_1[54] + getitem_2275 = _foreach_sqrt_1[55] + getitem_2276 = _foreach_sqrt_1[56] + getitem_2277 = _foreach_sqrt_1[57] + getitem_2278 = _foreach_sqrt_1[58] + getitem_2279 = _foreach_sqrt_1[59] + getitem_2280 = _foreach_sqrt_1[60] + getitem_2281 = _foreach_sqrt_1[61] + getitem_2282 = _foreach_sqrt_1[62] + getitem_2283 = _foreach_sqrt_1[63] + getitem_2284 = _foreach_sqrt_1[64] + getitem_2285 = _foreach_sqrt_1[65] + getitem_2286 = _foreach_sqrt_1[66] + getitem_2287 = _foreach_sqrt_1[67] + getitem_2288 = _foreach_sqrt_1[68] + getitem_2289 = _foreach_sqrt_1[69] + getitem_2290 = _foreach_sqrt_1[70] + getitem_2291 = _foreach_sqrt_1[71] + getitem_2292 = _foreach_sqrt_1[72] + getitem_2293 = _foreach_sqrt_1[73] + getitem_2294 = _foreach_sqrt_1[74] + getitem_2295 = _foreach_sqrt_1[75] + getitem_2296 = _foreach_sqrt_1[76] + getitem_2297 = _foreach_sqrt_1[77] + getitem_2298 = _foreach_sqrt_1[78] + getitem_2299 = _foreach_sqrt_1[79] + getitem_2300 = _foreach_sqrt_1[80] + getitem_2301 = _foreach_sqrt_1[81] + getitem_2302 = _foreach_sqrt_1[82] + getitem_2303 = _foreach_sqrt_1[83] + getitem_2304 = _foreach_sqrt_1[84] + getitem_2305 = _foreach_sqrt_1[85] + getitem_2306 = _foreach_sqrt_1[86] + getitem_2307 = _foreach_sqrt_1[87] + getitem_2308 = _foreach_sqrt_1[88] + getitem_2309 = _foreach_sqrt_1[89] + getitem_2310 = _foreach_sqrt_1[90] + getitem_2311 = _foreach_sqrt_1[91] + getitem_2312 = _foreach_sqrt_1[92] + getitem_2313 = _foreach_sqrt_1[93] + getitem_2314 = _foreach_sqrt_1[94] + getitem_2315 = _foreach_sqrt_1[95] + getitem_2316 = _foreach_sqrt_1[96] + getitem_2317 = _foreach_sqrt_1[97] + getitem_2318 = _foreach_sqrt_1[98] + getitem_2319 = _foreach_sqrt_1[99] + getitem_2320 = _foreach_sqrt_1[100] + getitem_2321 = _foreach_sqrt_1[101] + getitem_2322 = _foreach_sqrt_1[102] + getitem_2323 = _foreach_sqrt_1[103] + getitem_2324 = _foreach_sqrt_1[104] + getitem_2325 = _foreach_sqrt_1[105] + getitem_2326 = _foreach_sqrt_1[106] + getitem_2327 = _foreach_sqrt_1[107] + getitem_2328 = _foreach_sqrt_1[108] + getitem_2329 = _foreach_sqrt_1[109] + getitem_2330 = _foreach_sqrt_1[110] + getitem_2331 = _foreach_sqrt_1[111] + getitem_2332 = _foreach_sqrt_1[112] + getitem_2333 = _foreach_sqrt_1[113] + getitem_2334 = _foreach_sqrt_1[114] + getitem_2335 = _foreach_sqrt_1[115] + getitem_2336 = _foreach_sqrt_1[116] + getitem_2337 = _foreach_sqrt_1[117] + getitem_2338 = _foreach_sqrt_1[118] + getitem_2339 = _foreach_sqrt_1[119] + getitem_2340 = _foreach_sqrt_1[120] + getitem_2341 = _foreach_sqrt_1[121] + getitem_2342 = _foreach_sqrt_1[122] + getitem_2343 = _foreach_sqrt_1[123] + getitem_2344 = _foreach_sqrt_1[124] + getitem_2345 = _foreach_sqrt_1[125] + getitem_2346 = _foreach_sqrt_1[126] + getitem_2347 = _foreach_sqrt_1[127] + getitem_2348 = _foreach_sqrt_1[128] + getitem_2349 = _foreach_sqrt_1[129] + getitem_2350 = _foreach_sqrt_1[130] + getitem_2351 = _foreach_sqrt_1[131] + getitem_2352 = _foreach_sqrt_1[132] + getitem_2353 = _foreach_sqrt_1[133] + getitem_2354 = _foreach_sqrt_1[134] + getitem_2355 = _foreach_sqrt_1[135] + getitem_2356 = _foreach_sqrt_1[136] + getitem_2357 = _foreach_sqrt_1[137] + getitem_2358 = _foreach_sqrt_1[138] + getitem_2359 = _foreach_sqrt_1[139] + getitem_2360 = _foreach_sqrt_1[140] + getitem_2361 = _foreach_sqrt_1[141] + getitem_2362 = _foreach_sqrt_1[142] + getitem_2363 = _foreach_sqrt_1[143] + getitem_2364 = _foreach_sqrt_1[144] + getitem_2365 = _foreach_sqrt_1[145] + getitem_2366 = _foreach_sqrt_1[146] + getitem_2367 = _foreach_sqrt_1[147]; _foreach_sqrt_1 = None + _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]); getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None + getitem_2368 = _foreach_div_1[0] + getitem_2369 = _foreach_div_1[1] + getitem_2370 = _foreach_div_1[2] + getitem_2371 = _foreach_div_1[3] + getitem_2372 = _foreach_div_1[4] + getitem_2373 = _foreach_div_1[5] + getitem_2374 = _foreach_div_1[6] + getitem_2375 = _foreach_div_1[7] + getitem_2376 = _foreach_div_1[8] + getitem_2377 = _foreach_div_1[9] + getitem_2378 = _foreach_div_1[10] + getitem_2379 = _foreach_div_1[11] + getitem_2380 = _foreach_div_1[12] + getitem_2381 = _foreach_div_1[13] + getitem_2382 = _foreach_div_1[14] + getitem_2383 = _foreach_div_1[15] + getitem_2384 = _foreach_div_1[16] + getitem_2385 = _foreach_div_1[17] + getitem_2386 = _foreach_div_1[18] + getitem_2387 = _foreach_div_1[19] + getitem_2388 = _foreach_div_1[20] + getitem_2389 = _foreach_div_1[21] + getitem_2390 = _foreach_div_1[22] + getitem_2391 = _foreach_div_1[23] + getitem_2392 = _foreach_div_1[24] + getitem_2393 = _foreach_div_1[25] + getitem_2394 = _foreach_div_1[26] + getitem_2395 = _foreach_div_1[27] + getitem_2396 = _foreach_div_1[28] + getitem_2397 = _foreach_div_1[29] + getitem_2398 = _foreach_div_1[30] + getitem_2399 = _foreach_div_1[31] + getitem_2400 = _foreach_div_1[32] + getitem_2401 = _foreach_div_1[33] + getitem_2402 = _foreach_div_1[34] + getitem_2403 = _foreach_div_1[35] + getitem_2404 = _foreach_div_1[36] + getitem_2405 = _foreach_div_1[37] + getitem_2406 = _foreach_div_1[38] + getitem_2407 = _foreach_div_1[39] + getitem_2408 = _foreach_div_1[40] + getitem_2409 = _foreach_div_1[41] + getitem_2410 = _foreach_div_1[42] + getitem_2411 = _foreach_div_1[43] + getitem_2412 = _foreach_div_1[44] + getitem_2413 = _foreach_div_1[45] + getitem_2414 = _foreach_div_1[46] + getitem_2415 = _foreach_div_1[47] + getitem_2416 = _foreach_div_1[48] + getitem_2417 = _foreach_div_1[49] + getitem_2418 = _foreach_div_1[50] + getitem_2419 = _foreach_div_1[51] + getitem_2420 = _foreach_div_1[52] + getitem_2421 = _foreach_div_1[53] + getitem_2422 = _foreach_div_1[54] + getitem_2423 = _foreach_div_1[55] + getitem_2424 = _foreach_div_1[56] + getitem_2425 = _foreach_div_1[57] + getitem_2426 = _foreach_div_1[58] + getitem_2427 = _foreach_div_1[59] + getitem_2428 = _foreach_div_1[60] + getitem_2429 = _foreach_div_1[61] + getitem_2430 = _foreach_div_1[62] + getitem_2431 = _foreach_div_1[63] + getitem_2432 = _foreach_div_1[64] + getitem_2433 = _foreach_div_1[65] + getitem_2434 = _foreach_div_1[66] + getitem_2435 = _foreach_div_1[67] + getitem_2436 = _foreach_div_1[68] + getitem_2437 = _foreach_div_1[69] + getitem_2438 = _foreach_div_1[70] + getitem_2439 = _foreach_div_1[71] + getitem_2440 = _foreach_div_1[72] + getitem_2441 = _foreach_div_1[73] + getitem_2442 = _foreach_div_1[74] + getitem_2443 = _foreach_div_1[75] + getitem_2444 = _foreach_div_1[76] + getitem_2445 = _foreach_div_1[77] + getitem_2446 = _foreach_div_1[78] + getitem_2447 = _foreach_div_1[79] + getitem_2448 = _foreach_div_1[80] + getitem_2449 = _foreach_div_1[81] + getitem_2450 = _foreach_div_1[82] + getitem_2451 = _foreach_div_1[83] + getitem_2452 = _foreach_div_1[84] + getitem_2453 = _foreach_div_1[85] + getitem_2454 = _foreach_div_1[86] + getitem_2455 = _foreach_div_1[87] + getitem_2456 = _foreach_div_1[88] + getitem_2457 = _foreach_div_1[89] + getitem_2458 = _foreach_div_1[90] + getitem_2459 = _foreach_div_1[91] + getitem_2460 = _foreach_div_1[92] + getitem_2461 = _foreach_div_1[93] + getitem_2462 = _foreach_div_1[94] + getitem_2463 = _foreach_div_1[95] + getitem_2464 = _foreach_div_1[96] + getitem_2465 = _foreach_div_1[97] + getitem_2466 = _foreach_div_1[98] + getitem_2467 = _foreach_div_1[99] + getitem_2468 = _foreach_div_1[100] + getitem_2469 = _foreach_div_1[101] + getitem_2470 = _foreach_div_1[102] + getitem_2471 = _foreach_div_1[103] + getitem_2472 = _foreach_div_1[104] + getitem_2473 = _foreach_div_1[105] + getitem_2474 = _foreach_div_1[106] + getitem_2475 = _foreach_div_1[107] + getitem_2476 = _foreach_div_1[108] + getitem_2477 = _foreach_div_1[109] + getitem_2478 = _foreach_div_1[110] + getitem_2479 = _foreach_div_1[111] + getitem_2480 = _foreach_div_1[112] + getitem_2481 = _foreach_div_1[113] + getitem_2482 = _foreach_div_1[114] + getitem_2483 = _foreach_div_1[115] + getitem_2484 = _foreach_div_1[116] + getitem_2485 = _foreach_div_1[117] + getitem_2486 = _foreach_div_1[118] + getitem_2487 = _foreach_div_1[119] + getitem_2488 = _foreach_div_1[120] + getitem_2489 = _foreach_div_1[121] + getitem_2490 = _foreach_div_1[122] + getitem_2491 = _foreach_div_1[123] + getitem_2492 = _foreach_div_1[124] + getitem_2493 = _foreach_div_1[125] + getitem_2494 = _foreach_div_1[126] + getitem_2495 = _foreach_div_1[127] + getitem_2496 = _foreach_div_1[128] + getitem_2497 = _foreach_div_1[129] + getitem_2498 = _foreach_div_1[130] + getitem_2499 = _foreach_div_1[131] + getitem_2500 = _foreach_div_1[132] + getitem_2501 = _foreach_div_1[133] + getitem_2502 = _foreach_div_1[134] + getitem_2503 = _foreach_div_1[135] + getitem_2504 = _foreach_div_1[136] + getitem_2505 = _foreach_div_1[137] + getitem_2506 = _foreach_div_1[138] + getitem_2507 = _foreach_div_1[139] + getitem_2508 = _foreach_div_1[140] + getitem_2509 = _foreach_div_1[141] + getitem_2510 = _foreach_div_1[142] + getitem_2511 = _foreach_div_1[143] + getitem_2512 = _foreach_div_1[144] + getitem_2513 = _foreach_div_1[145] + getitem_2514 = _foreach_div_1[146] + getitem_2515 = _foreach_div_1[147]; _foreach_div_1 = None + _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08); getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None + getitem_2516 = _foreach_add_3[0] + getitem_2517 = _foreach_add_3[1] + getitem_2518 = _foreach_add_3[2] + getitem_2519 = _foreach_add_3[3] + getitem_2520 = _foreach_add_3[4] + getitem_2521 = _foreach_add_3[5] + getitem_2522 = _foreach_add_3[6] + getitem_2523 = _foreach_add_3[7] + getitem_2524 = _foreach_add_3[8] + getitem_2525 = _foreach_add_3[9] + getitem_2526 = _foreach_add_3[10] + getitem_2527 = _foreach_add_3[11] + getitem_2528 = _foreach_add_3[12] + getitem_2529 = _foreach_add_3[13] + getitem_2530 = _foreach_add_3[14] + getitem_2531 = _foreach_add_3[15] + getitem_2532 = _foreach_add_3[16] + getitem_2533 = _foreach_add_3[17] + getitem_2534 = _foreach_add_3[18] + getitem_2535 = _foreach_add_3[19] + getitem_2536 = _foreach_add_3[20] + getitem_2537 = _foreach_add_3[21] + getitem_2538 = _foreach_add_3[22] + getitem_2539 = _foreach_add_3[23] + getitem_2540 = _foreach_add_3[24] + getitem_2541 = _foreach_add_3[25] + getitem_2542 = _foreach_add_3[26] + getitem_2543 = _foreach_add_3[27] + getitem_2544 = _foreach_add_3[28] + getitem_2545 = _foreach_add_3[29] + getitem_2546 = _foreach_add_3[30] + getitem_2547 = _foreach_add_3[31] + getitem_2548 = _foreach_add_3[32] + getitem_2549 = _foreach_add_3[33] + getitem_2550 = _foreach_add_3[34] + getitem_2551 = _foreach_add_3[35] + getitem_2552 = _foreach_add_3[36] + getitem_2553 = _foreach_add_3[37] + getitem_2554 = _foreach_add_3[38] + getitem_2555 = _foreach_add_3[39] + getitem_2556 = _foreach_add_3[40] + getitem_2557 = _foreach_add_3[41] + getitem_2558 = _foreach_add_3[42] + getitem_2559 = _foreach_add_3[43] + getitem_2560 = _foreach_add_3[44] + getitem_2561 = _foreach_add_3[45] + getitem_2562 = _foreach_add_3[46] + getitem_2563 = _foreach_add_3[47] + getitem_2564 = _foreach_add_3[48] + getitem_2565 = _foreach_add_3[49] + getitem_2566 = _foreach_add_3[50] + getitem_2567 = _foreach_add_3[51] + getitem_2568 = _foreach_add_3[52] + getitem_2569 = _foreach_add_3[53] + getitem_2570 = _foreach_add_3[54] + getitem_2571 = _foreach_add_3[55] + getitem_2572 = _foreach_add_3[56] + getitem_2573 = _foreach_add_3[57] + getitem_2574 = _foreach_add_3[58] + getitem_2575 = _foreach_add_3[59] + getitem_2576 = _foreach_add_3[60] + getitem_2577 = _foreach_add_3[61] + getitem_2578 = _foreach_add_3[62] + getitem_2579 = _foreach_add_3[63] + getitem_2580 = _foreach_add_3[64] + getitem_2581 = _foreach_add_3[65] + getitem_2582 = _foreach_add_3[66] + getitem_2583 = _foreach_add_3[67] + getitem_2584 = _foreach_add_3[68] + getitem_2585 = _foreach_add_3[69] + getitem_2586 = _foreach_add_3[70] + getitem_2587 = _foreach_add_3[71] + getitem_2588 = _foreach_add_3[72] + getitem_2589 = _foreach_add_3[73] + getitem_2590 = _foreach_add_3[74] + getitem_2591 = _foreach_add_3[75] + getitem_2592 = _foreach_add_3[76] + getitem_2593 = _foreach_add_3[77] + getitem_2594 = _foreach_add_3[78] + getitem_2595 = _foreach_add_3[79] + getitem_2596 = _foreach_add_3[80] + getitem_2597 = _foreach_add_3[81] + getitem_2598 = _foreach_add_3[82] + getitem_2599 = _foreach_add_3[83] + getitem_2600 = _foreach_add_3[84] + getitem_2601 = _foreach_add_3[85] + getitem_2602 = _foreach_add_3[86] + getitem_2603 = _foreach_add_3[87] + getitem_2604 = _foreach_add_3[88] + getitem_2605 = _foreach_add_3[89] + getitem_2606 = _foreach_add_3[90] + getitem_2607 = _foreach_add_3[91] + getitem_2608 = _foreach_add_3[92] + getitem_2609 = _foreach_add_3[93] + getitem_2610 = _foreach_add_3[94] + getitem_2611 = _foreach_add_3[95] + getitem_2612 = _foreach_add_3[96] + getitem_2613 = _foreach_add_3[97] + getitem_2614 = _foreach_add_3[98] + getitem_2615 = _foreach_add_3[99] + getitem_2616 = _foreach_add_3[100] + getitem_2617 = _foreach_add_3[101] + getitem_2618 = _foreach_add_3[102] + getitem_2619 = _foreach_add_3[103] + getitem_2620 = _foreach_add_3[104] + getitem_2621 = _foreach_add_3[105] + getitem_2622 = _foreach_add_3[106] + getitem_2623 = _foreach_add_3[107] + getitem_2624 = _foreach_add_3[108] + getitem_2625 = _foreach_add_3[109] + getitem_2626 = _foreach_add_3[110] + getitem_2627 = _foreach_add_3[111] + getitem_2628 = _foreach_add_3[112] + getitem_2629 = _foreach_add_3[113] + getitem_2630 = _foreach_add_3[114] + getitem_2631 = _foreach_add_3[115] + getitem_2632 = _foreach_add_3[116] + getitem_2633 = _foreach_add_3[117] + getitem_2634 = _foreach_add_3[118] + getitem_2635 = _foreach_add_3[119] + getitem_2636 = _foreach_add_3[120] + getitem_2637 = _foreach_add_3[121] + getitem_2638 = _foreach_add_3[122] + getitem_2639 = _foreach_add_3[123] + getitem_2640 = _foreach_add_3[124] + getitem_2641 = _foreach_add_3[125] + getitem_2642 = _foreach_add_3[126] + getitem_2643 = _foreach_add_3[127] + getitem_2644 = _foreach_add_3[128] + getitem_2645 = _foreach_add_3[129] + getitem_2646 = _foreach_add_3[130] + getitem_2647 = _foreach_add_3[131] + getitem_2648 = _foreach_add_3[132] + getitem_2649 = _foreach_add_3[133] + getitem_2650 = _foreach_add_3[134] + getitem_2651 = _foreach_add_3[135] + getitem_2652 = _foreach_add_3[136] + getitem_2653 = _foreach_add_3[137] + getitem_2654 = _foreach_add_3[138] + getitem_2655 = _foreach_add_3[139] + getitem_2656 = _foreach_add_3[140] + getitem_2657 = _foreach_add_3[141] + getitem_2658 = _foreach_add_3[142] + getitem_2659 = _foreach_add_3[143] + getitem_2660 = _foreach_add_3[144] + getitem_2661 = _foreach_add_3[145] + getitem_2662 = _foreach_add_3[146] + getitem_2663 = _foreach_add_3[147]; _foreach_add_3 = None + _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]); getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None + getitem_2664 = _foreach_div_2[0] + getitem_2665 = _foreach_div_2[1] + getitem_2666 = _foreach_div_2[2] + getitem_2667 = _foreach_div_2[3] + getitem_2668 = _foreach_div_2[4] + getitem_2669 = _foreach_div_2[5] + getitem_2670 = _foreach_div_2[6] + getitem_2671 = _foreach_div_2[7] + getitem_2672 = _foreach_div_2[8] + getitem_2673 = _foreach_div_2[9] + getitem_2674 = _foreach_div_2[10] + getitem_2675 = _foreach_div_2[11] + getitem_2676 = _foreach_div_2[12] + getitem_2677 = _foreach_div_2[13] + getitem_2678 = _foreach_div_2[14] + getitem_2679 = _foreach_div_2[15] + getitem_2680 = _foreach_div_2[16] + getitem_2681 = _foreach_div_2[17] + getitem_2682 = _foreach_div_2[18] + getitem_2683 = _foreach_div_2[19] + getitem_2684 = _foreach_div_2[20] + getitem_2685 = _foreach_div_2[21] + getitem_2686 = _foreach_div_2[22] + getitem_2687 = _foreach_div_2[23] + getitem_2688 = _foreach_div_2[24] + getitem_2689 = _foreach_div_2[25] + getitem_2690 = _foreach_div_2[26] + getitem_2691 = _foreach_div_2[27] + getitem_2692 = _foreach_div_2[28] + getitem_2693 = _foreach_div_2[29] + getitem_2694 = _foreach_div_2[30] + getitem_2695 = _foreach_div_2[31] + getitem_2696 = _foreach_div_2[32] + getitem_2697 = _foreach_div_2[33] + getitem_2698 = _foreach_div_2[34] + getitem_2699 = _foreach_div_2[35] + getitem_2700 = _foreach_div_2[36] + getitem_2701 = _foreach_div_2[37] + getitem_2702 = _foreach_div_2[38] + getitem_2703 = _foreach_div_2[39] + getitem_2704 = _foreach_div_2[40] + getitem_2705 = _foreach_div_2[41] + getitem_2706 = _foreach_div_2[42] + getitem_2707 = _foreach_div_2[43] + getitem_2708 = _foreach_div_2[44] + getitem_2709 = _foreach_div_2[45] + getitem_2710 = _foreach_div_2[46] + getitem_2711 = _foreach_div_2[47] + getitem_2712 = _foreach_div_2[48] + getitem_2713 = _foreach_div_2[49] + getitem_2714 = _foreach_div_2[50] + getitem_2715 = _foreach_div_2[51] + getitem_2716 = _foreach_div_2[52] + getitem_2717 = _foreach_div_2[53] + getitem_2718 = _foreach_div_2[54] + getitem_2719 = _foreach_div_2[55] + getitem_2720 = _foreach_div_2[56] + getitem_2721 = _foreach_div_2[57] + getitem_2722 = _foreach_div_2[58] + getitem_2723 = _foreach_div_2[59] + getitem_2724 = _foreach_div_2[60] + getitem_2725 = _foreach_div_2[61] + getitem_2726 = _foreach_div_2[62] + getitem_2727 = _foreach_div_2[63] + getitem_2728 = _foreach_div_2[64] + getitem_2729 = _foreach_div_2[65] + getitem_2730 = _foreach_div_2[66] + getitem_2731 = _foreach_div_2[67] + getitem_2732 = _foreach_div_2[68] + getitem_2733 = _foreach_div_2[69] + getitem_2734 = _foreach_div_2[70] + getitem_2735 = _foreach_div_2[71] + getitem_2736 = _foreach_div_2[72] + getitem_2737 = _foreach_div_2[73] + getitem_2738 = _foreach_div_2[74] + getitem_2739 = _foreach_div_2[75] + getitem_2740 = _foreach_div_2[76] + getitem_2741 = _foreach_div_2[77] + getitem_2742 = _foreach_div_2[78] + getitem_2743 = _foreach_div_2[79] + getitem_2744 = _foreach_div_2[80] + getitem_2745 = _foreach_div_2[81] + getitem_2746 = _foreach_div_2[82] + getitem_2747 = _foreach_div_2[83] + getitem_2748 = _foreach_div_2[84] + getitem_2749 = _foreach_div_2[85] + getitem_2750 = _foreach_div_2[86] + getitem_2751 = _foreach_div_2[87] + getitem_2752 = _foreach_div_2[88] + getitem_2753 = _foreach_div_2[89] + getitem_2754 = _foreach_div_2[90] + getitem_2755 = _foreach_div_2[91] + getitem_2756 = _foreach_div_2[92] + getitem_2757 = _foreach_div_2[93] + getitem_2758 = _foreach_div_2[94] + getitem_2759 = _foreach_div_2[95] + getitem_2760 = _foreach_div_2[96] + getitem_2761 = _foreach_div_2[97] + getitem_2762 = _foreach_div_2[98] + getitem_2763 = _foreach_div_2[99] + getitem_2764 = _foreach_div_2[100] + getitem_2765 = _foreach_div_2[101] + getitem_2766 = _foreach_div_2[102] + getitem_2767 = _foreach_div_2[103] + getitem_2768 = _foreach_div_2[104] + getitem_2769 = _foreach_div_2[105] + getitem_2770 = _foreach_div_2[106] + getitem_2771 = _foreach_div_2[107] + getitem_2772 = _foreach_div_2[108] + getitem_2773 = _foreach_div_2[109] + getitem_2774 = _foreach_div_2[110] + getitem_2775 = _foreach_div_2[111] + getitem_2776 = _foreach_div_2[112] + getitem_2777 = _foreach_div_2[113] + getitem_2778 = _foreach_div_2[114] + getitem_2779 = _foreach_div_2[115] + getitem_2780 = _foreach_div_2[116] + getitem_2781 = _foreach_div_2[117] + getitem_2782 = _foreach_div_2[118] + getitem_2783 = _foreach_div_2[119] + getitem_2784 = _foreach_div_2[120] + getitem_2785 = _foreach_div_2[121] + getitem_2786 = _foreach_div_2[122] + getitem_2787 = _foreach_div_2[123] + getitem_2788 = _foreach_div_2[124] + getitem_2789 = _foreach_div_2[125] + getitem_2790 = _foreach_div_2[126] + getitem_2791 = _foreach_div_2[127] + getitem_2792 = _foreach_div_2[128] + getitem_2793 = _foreach_div_2[129] + getitem_2794 = _foreach_div_2[130] + getitem_2795 = _foreach_div_2[131] + getitem_2796 = _foreach_div_2[132] + getitem_2797 = _foreach_div_2[133] + getitem_2798 = _foreach_div_2[134] + getitem_2799 = _foreach_div_2[135] + getitem_2800 = _foreach_div_2[136] + getitem_2801 = _foreach_div_2[137] + getitem_2802 = _foreach_div_2[138] + getitem_2803 = _foreach_div_2[139] + getitem_2804 = _foreach_div_2[140] + getitem_2805 = _foreach_div_2[141] + getitem_2806 = _foreach_div_2[142] + getitem_2807 = _foreach_div_2[143] + getitem_2808 = _foreach_div_2[144] + getitem_2809 = _foreach_div_2[145] + getitem_2810 = _foreach_div_2[146] + getitem_2811 = _foreach_div_2[147]; _foreach_div_2 = None + _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]); getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None + getitem_2812 = _foreach_div_3[0] + getitem_2813 = _foreach_div_3[1] + getitem_2814 = _foreach_div_3[2] + getitem_2815 = _foreach_div_3[3] + getitem_2816 = _foreach_div_3[4] + getitem_2817 = _foreach_div_3[5] + getitem_2818 = _foreach_div_3[6] + getitem_2819 = _foreach_div_3[7] + getitem_2820 = _foreach_div_3[8] + getitem_2821 = _foreach_div_3[9] + getitem_2822 = _foreach_div_3[10] + getitem_2823 = _foreach_div_3[11] + getitem_2824 = _foreach_div_3[12] + getitem_2825 = _foreach_div_3[13] + getitem_2826 = _foreach_div_3[14] + getitem_2827 = _foreach_div_3[15] + getitem_2828 = _foreach_div_3[16] + getitem_2829 = _foreach_div_3[17] + getitem_2830 = _foreach_div_3[18] + getitem_2831 = _foreach_div_3[19] + getitem_2832 = _foreach_div_3[20] + getitem_2833 = _foreach_div_3[21] + getitem_2834 = _foreach_div_3[22] + getitem_2835 = _foreach_div_3[23] + getitem_2836 = _foreach_div_3[24] + getitem_2837 = _foreach_div_3[25] + getitem_2838 = _foreach_div_3[26] + getitem_2839 = _foreach_div_3[27] + getitem_2840 = _foreach_div_3[28] + getitem_2841 = _foreach_div_3[29] + getitem_2842 = _foreach_div_3[30] + getitem_2843 = _foreach_div_3[31] + getitem_2844 = _foreach_div_3[32] + getitem_2845 = _foreach_div_3[33] + getitem_2846 = _foreach_div_3[34] + getitem_2847 = _foreach_div_3[35] + getitem_2848 = _foreach_div_3[36] + getitem_2849 = _foreach_div_3[37] + getitem_2850 = _foreach_div_3[38] + getitem_2851 = _foreach_div_3[39] + getitem_2852 = _foreach_div_3[40] + getitem_2853 = _foreach_div_3[41] + getitem_2854 = _foreach_div_3[42] + getitem_2855 = _foreach_div_3[43] + getitem_2856 = _foreach_div_3[44] + getitem_2857 = _foreach_div_3[45] + getitem_2858 = _foreach_div_3[46] + getitem_2859 = _foreach_div_3[47] + getitem_2860 = _foreach_div_3[48] + getitem_2861 = _foreach_div_3[49] + getitem_2862 = _foreach_div_3[50] + getitem_2863 = _foreach_div_3[51] + getitem_2864 = _foreach_div_3[52] + getitem_2865 = _foreach_div_3[53] + getitem_2866 = _foreach_div_3[54] + getitem_2867 = _foreach_div_3[55] + getitem_2868 = _foreach_div_3[56] + getitem_2869 = _foreach_div_3[57] + getitem_2870 = _foreach_div_3[58] + getitem_2871 = _foreach_div_3[59] + getitem_2872 = _foreach_div_3[60] + getitem_2873 = _foreach_div_3[61] + getitem_2874 = _foreach_div_3[62] + getitem_2875 = _foreach_div_3[63] + getitem_2876 = _foreach_div_3[64] + getitem_2877 = _foreach_div_3[65] + getitem_2878 = _foreach_div_3[66] + getitem_2879 = _foreach_div_3[67] + getitem_2880 = _foreach_div_3[68] + getitem_2881 = _foreach_div_3[69] + getitem_2882 = _foreach_div_3[70] + getitem_2883 = _foreach_div_3[71] + getitem_2884 = _foreach_div_3[72] + getitem_2885 = _foreach_div_3[73] + getitem_2886 = _foreach_div_3[74] + getitem_2887 = _foreach_div_3[75] + getitem_2888 = _foreach_div_3[76] + getitem_2889 = _foreach_div_3[77] + getitem_2890 = _foreach_div_3[78] + getitem_2891 = _foreach_div_3[79] + getitem_2892 = _foreach_div_3[80] + getitem_2893 = _foreach_div_3[81] + getitem_2894 = _foreach_div_3[82] + getitem_2895 = _foreach_div_3[83] + getitem_2896 = _foreach_div_3[84] + getitem_2897 = _foreach_div_3[85] + getitem_2898 = _foreach_div_3[86] + getitem_2899 = _foreach_div_3[87] + getitem_2900 = _foreach_div_3[88] + getitem_2901 = _foreach_div_3[89] + getitem_2902 = _foreach_div_3[90] + getitem_2903 = _foreach_div_3[91] + getitem_2904 = _foreach_div_3[92] + getitem_2905 = _foreach_div_3[93] + getitem_2906 = _foreach_div_3[94] + getitem_2907 = _foreach_div_3[95] + getitem_2908 = _foreach_div_3[96] + getitem_2909 = _foreach_div_3[97] + getitem_2910 = _foreach_div_3[98] + getitem_2911 = _foreach_div_3[99] + getitem_2912 = _foreach_div_3[100] + getitem_2913 = _foreach_div_3[101] + getitem_2914 = _foreach_div_3[102] + getitem_2915 = _foreach_div_3[103] + getitem_2916 = _foreach_div_3[104] + getitem_2917 = _foreach_div_3[105] + getitem_2918 = _foreach_div_3[106] + getitem_2919 = _foreach_div_3[107] + getitem_2920 = _foreach_div_3[108] + getitem_2921 = _foreach_div_3[109] + getitem_2922 = _foreach_div_3[110] + getitem_2923 = _foreach_div_3[111] + getitem_2924 = _foreach_div_3[112] + getitem_2925 = _foreach_div_3[113] + getitem_2926 = _foreach_div_3[114] + getitem_2927 = _foreach_div_3[115] + getitem_2928 = _foreach_div_3[116] + getitem_2929 = _foreach_div_3[117] + getitem_2930 = _foreach_div_3[118] + getitem_2931 = _foreach_div_3[119] + getitem_2932 = _foreach_div_3[120] + getitem_2933 = _foreach_div_3[121] + getitem_2934 = _foreach_div_3[122] + getitem_2935 = _foreach_div_3[123] + getitem_2936 = _foreach_div_3[124] + getitem_2937 = _foreach_div_3[125] + getitem_2938 = _foreach_div_3[126] + getitem_2939 = _foreach_div_3[127] + getitem_2940 = _foreach_div_3[128] + getitem_2941 = _foreach_div_3[129] + getitem_2942 = _foreach_div_3[130] + getitem_2943 = _foreach_div_3[131] + getitem_2944 = _foreach_div_3[132] + getitem_2945 = _foreach_div_3[133] + getitem_2946 = _foreach_div_3[134] + getitem_2947 = _foreach_div_3[135] + getitem_2948 = _foreach_div_3[136] + getitem_2949 = _foreach_div_3[137] + getitem_2950 = _foreach_div_3[138] + getitem_2951 = _foreach_div_3[139] + getitem_2952 = _foreach_div_3[140] + getitem_2953 = _foreach_div_3[141] + getitem_2954 = _foreach_div_3[142] + getitem_2955 = _foreach_div_3[143] + getitem_2956 = _foreach_div_3[144] + getitem_2957 = _foreach_div_3[145] + getitem_2958 = _foreach_div_3[146] + getitem_2959 = _foreach_div_3[147]; _foreach_div_3 = None + _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]); getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None + getitem_2960 = _foreach_add_4[0] + getitem_2961 = _foreach_add_4[1] + getitem_2962 = _foreach_add_4[2] + getitem_2963 = _foreach_add_4[3] + getitem_2964 = _foreach_add_4[4] + getitem_2965 = _foreach_add_4[5] + getitem_2966 = _foreach_add_4[6] + getitem_2967 = _foreach_add_4[7] + getitem_2968 = _foreach_add_4[8] + getitem_2969 = _foreach_add_4[9] + getitem_2970 = _foreach_add_4[10] + getitem_2971 = _foreach_add_4[11] + getitem_2972 = _foreach_add_4[12] + getitem_2973 = _foreach_add_4[13] + getitem_2974 = _foreach_add_4[14] + getitem_2975 = _foreach_add_4[15] + getitem_2976 = _foreach_add_4[16] + getitem_2977 = _foreach_add_4[17] + getitem_2978 = _foreach_add_4[18] + getitem_2979 = _foreach_add_4[19] + getitem_2980 = _foreach_add_4[20] + getitem_2981 = _foreach_add_4[21] + getitem_2982 = _foreach_add_4[22] + getitem_2983 = _foreach_add_4[23] + getitem_2984 = _foreach_add_4[24] + getitem_2985 = _foreach_add_4[25] + getitem_2986 = _foreach_add_4[26] + getitem_2987 = _foreach_add_4[27] + getitem_2988 = _foreach_add_4[28] + getitem_2989 = _foreach_add_4[29] + getitem_2990 = _foreach_add_4[30] + getitem_2991 = _foreach_add_4[31] + getitem_2992 = _foreach_add_4[32] + getitem_2993 = _foreach_add_4[33] + getitem_2994 = _foreach_add_4[34] + getitem_2995 = _foreach_add_4[35] + getitem_2996 = _foreach_add_4[36] + getitem_2997 = _foreach_add_4[37] + getitem_2998 = _foreach_add_4[38] + getitem_2999 = _foreach_add_4[39] + getitem_3000 = _foreach_add_4[40] + getitem_3001 = _foreach_add_4[41] + getitem_3002 = _foreach_add_4[42] + getitem_3003 = _foreach_add_4[43] + getitem_3004 = _foreach_add_4[44] + getitem_3005 = _foreach_add_4[45] + getitem_3006 = _foreach_add_4[46] + getitem_3007 = _foreach_add_4[47] + getitem_3008 = _foreach_add_4[48] + getitem_3009 = _foreach_add_4[49] + getitem_3010 = _foreach_add_4[50] + getitem_3011 = _foreach_add_4[51] + getitem_3012 = _foreach_add_4[52] + getitem_3013 = _foreach_add_4[53] + getitem_3014 = _foreach_add_4[54] + getitem_3015 = _foreach_add_4[55] + getitem_3016 = _foreach_add_4[56] + getitem_3017 = _foreach_add_4[57] + getitem_3018 = _foreach_add_4[58] + getitem_3019 = _foreach_add_4[59] + getitem_3020 = _foreach_add_4[60] + getitem_3021 = _foreach_add_4[61] + getitem_3022 = _foreach_add_4[62] + getitem_3023 = _foreach_add_4[63] + getitem_3024 = _foreach_add_4[64] + getitem_3025 = _foreach_add_4[65] + getitem_3026 = _foreach_add_4[66] + getitem_3027 = _foreach_add_4[67] + getitem_3028 = _foreach_add_4[68] + getitem_3029 = _foreach_add_4[69] + getitem_3030 = _foreach_add_4[70] + getitem_3031 = _foreach_add_4[71] + getitem_3032 = _foreach_add_4[72] + getitem_3033 = _foreach_add_4[73] + getitem_3034 = _foreach_add_4[74] + getitem_3035 = _foreach_add_4[75] + getitem_3036 = _foreach_add_4[76] + getitem_3037 = _foreach_add_4[77] + getitem_3038 = _foreach_add_4[78] + getitem_3039 = _foreach_add_4[79] + getitem_3040 = _foreach_add_4[80] + getitem_3041 = _foreach_add_4[81] + getitem_3042 = _foreach_add_4[82] + getitem_3043 = _foreach_add_4[83] + getitem_3044 = _foreach_add_4[84] + getitem_3045 = _foreach_add_4[85] + getitem_3046 = _foreach_add_4[86] + getitem_3047 = _foreach_add_4[87] + getitem_3048 = _foreach_add_4[88] + getitem_3049 = _foreach_add_4[89] + getitem_3050 = _foreach_add_4[90] + getitem_3051 = _foreach_add_4[91] + getitem_3052 = _foreach_add_4[92] + getitem_3053 = _foreach_add_4[93] + getitem_3054 = _foreach_add_4[94] + getitem_3055 = _foreach_add_4[95] + getitem_3056 = _foreach_add_4[96] + getitem_3057 = _foreach_add_4[97] + getitem_3058 = _foreach_add_4[98] + getitem_3059 = _foreach_add_4[99] + getitem_3060 = _foreach_add_4[100] + getitem_3061 = _foreach_add_4[101] + getitem_3062 = _foreach_add_4[102] + getitem_3063 = _foreach_add_4[103] + getitem_3064 = _foreach_add_4[104] + getitem_3065 = _foreach_add_4[105] + getitem_3066 = _foreach_add_4[106] + getitem_3067 = _foreach_add_4[107] + getitem_3068 = _foreach_add_4[108] + getitem_3069 = _foreach_add_4[109] + getitem_3070 = _foreach_add_4[110] + getitem_3071 = _foreach_add_4[111] + getitem_3072 = _foreach_add_4[112] + getitem_3073 = _foreach_add_4[113] + getitem_3074 = _foreach_add_4[114] + getitem_3075 = _foreach_add_4[115] + getitem_3076 = _foreach_add_4[116] + getitem_3077 = _foreach_add_4[117] + getitem_3078 = _foreach_add_4[118] + getitem_3079 = _foreach_add_4[119] + getitem_3080 = _foreach_add_4[120] + getitem_3081 = _foreach_add_4[121] + getitem_3082 = _foreach_add_4[122] + getitem_3083 = _foreach_add_4[123] + getitem_3084 = _foreach_add_4[124] + getitem_3085 = _foreach_add_4[125] + getitem_3086 = _foreach_add_4[126] + getitem_3087 = _foreach_add_4[127] + getitem_3088 = _foreach_add_4[128] + getitem_3089 = _foreach_add_4[129] + getitem_3090 = _foreach_add_4[130] + getitem_3091 = _foreach_add_4[131] + getitem_3092 = _foreach_add_4[132] + getitem_3093 = _foreach_add_4[133] + getitem_3094 = _foreach_add_4[134] + getitem_3095 = _foreach_add_4[135] + getitem_3096 = _foreach_add_4[136] + getitem_3097 = _foreach_add_4[137] + getitem_3098 = _foreach_add_4[138] + getitem_3099 = _foreach_add_4[139] + getitem_3100 = _foreach_add_4[140] + getitem_3101 = _foreach_add_4[141] + getitem_3102 = _foreach_add_4[142] + getitem_3103 = _foreach_add_4[143] + getitem_3104 = _foreach_add_4[144] + getitem_3105 = _foreach_add_4[145] + getitem_3106 = _foreach_add_4[146] + getitem_3107 = _foreach_add_4[147]; _foreach_add_4 = None + copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960); arg0_1 = getitem_2960 = copy_ = None + copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961); arg1_1 = getitem_2961 = copy__1 = None + copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962); arg2_1 = getitem_2962 = copy__2 = None + copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963); arg3_1 = getitem_2963 = copy__3 = None + copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964); arg4_1 = getitem_2964 = copy__4 = None + copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965); arg5_1 = getitem_2965 = copy__5 = None + copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966); arg6_1 = getitem_2966 = copy__6 = None + copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967); arg7_1 = getitem_2967 = copy__7 = None + copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968); arg8_1 = getitem_2968 = copy__8 = None + copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969); arg9_1 = getitem_2969 = copy__9 = None + copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970); arg10_1 = getitem_2970 = copy__10 = None + copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971); arg11_1 = getitem_2971 = copy__11 = None + copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972); arg12_1 = getitem_2972 = copy__12 = None + copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973); arg13_1 = getitem_2973 = copy__13 = None + copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974); arg14_1 = getitem_2974 = copy__14 = None + copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975); arg15_1 = getitem_2975 = copy__15 = None + copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976); arg16_1 = getitem_2976 = copy__16 = None + copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977); arg17_1 = getitem_2977 = copy__17 = None + copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978); arg18_1 = getitem_2978 = copy__18 = None + copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979); arg19_1 = getitem_2979 = copy__19 = None + copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980); arg20_1 = getitem_2980 = copy__20 = None + copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981); arg21_1 = getitem_2981 = copy__21 = None + copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982); arg22_1 = getitem_2982 = copy__22 = None + copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983); arg23_1 = getitem_2983 = copy__23 = None + copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984); arg24_1 = getitem_2984 = copy__24 = None + copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985); arg25_1 = getitem_2985 = copy__25 = None + copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986); arg26_1 = getitem_2986 = copy__26 = None + copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987); arg27_1 = getitem_2987 = copy__27 = None + copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988); arg28_1 = getitem_2988 = copy__28 = None + copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989); arg29_1 = getitem_2989 = copy__29 = None + copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990); arg30_1 = getitem_2990 = copy__30 = None + copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991); arg31_1 = getitem_2991 = copy__31 = None + copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992); arg32_1 = getitem_2992 = copy__32 = None + copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993); arg33_1 = getitem_2993 = copy__33 = None + copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994); arg34_1 = getitem_2994 = copy__34 = None + copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995); arg35_1 = getitem_2995 = copy__35 = None + copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996); arg36_1 = getitem_2996 = copy__36 = None + copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997); arg37_1 = getitem_2997 = copy__37 = None + copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998); arg38_1 = getitem_2998 = copy__38 = None + copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999); arg39_1 = getitem_2999 = copy__39 = None + copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000); arg40_1 = getitem_3000 = copy__40 = None + copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001); arg41_1 = getitem_3001 = copy__41 = None + copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002); arg42_1 = getitem_3002 = copy__42 = None + copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003); arg43_1 = getitem_3003 = copy__43 = None + copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004); arg44_1 = getitem_3004 = copy__44 = None + copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005); arg45_1 = getitem_3005 = copy__45 = None + copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006); arg46_1 = getitem_3006 = copy__46 = None + copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007); arg47_1 = getitem_3007 = copy__47 = None + copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008); arg48_1 = getitem_3008 = copy__48 = None + copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009); arg49_1 = getitem_3009 = copy__49 = None + copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010); arg50_1 = getitem_3010 = copy__50 = None + copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011); arg51_1 = getitem_3011 = copy__51 = None + copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012); arg52_1 = getitem_3012 = copy__52 = None + copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013); arg53_1 = getitem_3013 = copy__53 = None + copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014); arg54_1 = getitem_3014 = copy__54 = None + copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015); arg55_1 = getitem_3015 = copy__55 = None + copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016); arg56_1 = getitem_3016 = copy__56 = None + copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017); arg57_1 = getitem_3017 = copy__57 = None + copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018); arg58_1 = getitem_3018 = copy__58 = None + copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019); arg59_1 = getitem_3019 = copy__59 = None + copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020); arg60_1 = getitem_3020 = copy__60 = None + copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021); arg61_1 = getitem_3021 = copy__61 = None + copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022); arg62_1 = getitem_3022 = copy__62 = None + copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023); arg63_1 = getitem_3023 = copy__63 = None + copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024); arg64_1 = getitem_3024 = copy__64 = None + copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025); arg65_1 = getitem_3025 = copy__65 = None + copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026); arg66_1 = getitem_3026 = copy__66 = None + copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027); arg67_1 = getitem_3027 = copy__67 = None + copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028); arg68_1 = getitem_3028 = copy__68 = None + copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029); arg69_1 = getitem_3029 = copy__69 = None + copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030); arg70_1 = getitem_3030 = copy__70 = None + copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031); arg71_1 = getitem_3031 = copy__71 = None + copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032); arg72_1 = getitem_3032 = copy__72 = None + copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033); arg73_1 = getitem_3033 = copy__73 = None + copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034); arg74_1 = getitem_3034 = copy__74 = None + copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035); arg75_1 = getitem_3035 = copy__75 = None + copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036); arg76_1 = getitem_3036 = copy__76 = None + copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037); arg77_1 = getitem_3037 = copy__77 = None + copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038); arg78_1 = getitem_3038 = copy__78 = None + copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039); arg79_1 = getitem_3039 = copy__79 = None + copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040); arg80_1 = getitem_3040 = copy__80 = None + copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041); arg81_1 = getitem_3041 = copy__81 = None + copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042); arg82_1 = getitem_3042 = copy__82 = None + copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043); arg83_1 = getitem_3043 = copy__83 = None + copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044); arg84_1 = getitem_3044 = copy__84 = None + copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045); arg85_1 = getitem_3045 = copy__85 = None + copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046); arg86_1 = getitem_3046 = copy__86 = None + copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047); arg87_1 = getitem_3047 = copy__87 = None + copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048); arg88_1 = getitem_3048 = copy__88 = None + copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049); arg89_1 = getitem_3049 = copy__89 = None + copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050); arg90_1 = getitem_3050 = copy__90 = None + copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051); arg91_1 = getitem_3051 = copy__91 = None + copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052); arg92_1 = getitem_3052 = copy__92 = None + copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053); arg93_1 = getitem_3053 = copy__93 = None + copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054); arg94_1 = getitem_3054 = copy__94 = None + copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055); arg95_1 = getitem_3055 = copy__95 = None + copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056); arg96_1 = getitem_3056 = copy__96 = None + copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057); arg97_1 = getitem_3057 = copy__97 = None + copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058); arg98_1 = getitem_3058 = copy__98 = None + copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059); arg99_1 = getitem_3059 = copy__99 = None + copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060); arg100_1 = getitem_3060 = copy__100 = None + copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061); arg101_1 = getitem_3061 = copy__101 = None + copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062); arg102_1 = getitem_3062 = copy__102 = None + copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063); arg103_1 = getitem_3063 = copy__103 = None + copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064); arg104_1 = getitem_3064 = copy__104 = None + copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065); arg105_1 = getitem_3065 = copy__105 = None + copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066); arg106_1 = getitem_3066 = copy__106 = None + copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067); arg107_1 = getitem_3067 = copy__107 = None + copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068); arg108_1 = getitem_3068 = copy__108 = None + copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069); arg109_1 = getitem_3069 = copy__109 = None + copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070); arg110_1 = getitem_3070 = copy__110 = None + copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071); arg111_1 = getitem_3071 = copy__111 = None + copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072); arg112_1 = getitem_3072 = copy__112 = None + copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073); arg113_1 = getitem_3073 = copy__113 = None + copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074); arg114_1 = getitem_3074 = copy__114 = None + copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075); arg115_1 = getitem_3075 = copy__115 = None + copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076); arg116_1 = getitem_3076 = copy__116 = None + copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077); arg117_1 = getitem_3077 = copy__117 = None + copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078); arg118_1 = getitem_3078 = copy__118 = None + copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079); arg119_1 = getitem_3079 = copy__119 = None + copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080); arg120_1 = getitem_3080 = copy__120 = None + copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081); arg121_1 = getitem_3081 = copy__121 = None + copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082); arg122_1 = getitem_3082 = copy__122 = None + copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083); arg123_1 = getitem_3083 = copy__123 = None + copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084); arg124_1 = getitem_3084 = copy__124 = None + copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085); arg125_1 = getitem_3085 = copy__125 = None + copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086); arg126_1 = getitem_3086 = copy__126 = None + copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087); arg127_1 = getitem_3087 = copy__127 = None + copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088); arg128_1 = getitem_3088 = copy__128 = None + copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089); arg129_1 = getitem_3089 = copy__129 = None + copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090); arg130_1 = getitem_3090 = copy__130 = None + copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091); arg131_1 = getitem_3091 = copy__131 = None + copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092); arg132_1 = getitem_3092 = copy__132 = None + copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093); arg133_1 = getitem_3093 = copy__133 = None + copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094); arg134_1 = getitem_3094 = copy__134 = None + copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095); arg135_1 = getitem_3095 = copy__135 = None + copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096); arg136_1 = getitem_3096 = copy__136 = None + copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097); arg137_1 = getitem_3097 = copy__137 = None + copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098); arg138_1 = getitem_3098 = copy__138 = None + copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099); arg139_1 = getitem_3099 = copy__139 = None + copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100); arg140_1 = getitem_3100 = copy__140 = None + copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101); arg141_1 = getitem_3101 = copy__141 = None + copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102); arg142_1 = getitem_3102 = copy__142 = None + copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103); arg143_1 = getitem_3103 = copy__143 = None + copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104); arg144_1 = getitem_3104 = copy__144 = None + copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105); arg145_1 = getitem_3105 = copy__145 = None + copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106); arg146_1 = getitem_3106 = copy__146 = None + copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107); arg147_1 = getitem_3107 = copy__147 = None + copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1); arg148_1 = getitem_1 = copy__148 = None + copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445); arg149_1 = getitem_445 = copy__149 = None + copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889); arg150_1 = getitem_889 = copy__150 = None + copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444); arg299_1 = getitem_444 = copy__151 = None + copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446); arg300_1 = getitem_446 = copy__152 = None + copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447); arg301_1 = getitem_447 = copy__153 = None + copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448); arg302_1 = getitem_448 = copy__154 = None + copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449); arg303_1 = getitem_449 = copy__155 = None + copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450); arg304_1 = getitem_450 = copy__156 = None + copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451); arg305_1 = getitem_451 = copy__157 = None + copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452); arg306_1 = getitem_452 = copy__158 = None + copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453); arg307_1 = getitem_453 = copy__159 = None + copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454); arg308_1 = getitem_454 = copy__160 = None + copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455); arg309_1 = getitem_455 = copy__161 = None + copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456); arg310_1 = getitem_456 = copy__162 = None + copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457); arg311_1 = getitem_457 = copy__163 = None + copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458); arg312_1 = getitem_458 = copy__164 = None + copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459); arg313_1 = getitem_459 = copy__165 = None + copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460); arg314_1 = getitem_460 = copy__166 = None + copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461); arg315_1 = getitem_461 = copy__167 = None + copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462); arg316_1 = getitem_462 = copy__168 = None + copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463); arg317_1 = getitem_463 = copy__169 = None + copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464); arg318_1 = getitem_464 = copy__170 = None + copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465); arg319_1 = getitem_465 = copy__171 = None + copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466); arg320_1 = getitem_466 = copy__172 = None + copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467); arg321_1 = getitem_467 = copy__173 = None + copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468); arg322_1 = getitem_468 = copy__174 = None + copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469); arg323_1 = getitem_469 = copy__175 = None + copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470); arg324_1 = getitem_470 = copy__176 = None + copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471); arg325_1 = getitem_471 = copy__177 = None + copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472); arg326_1 = getitem_472 = copy__178 = None + copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473); arg327_1 = getitem_473 = copy__179 = None + copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474); arg328_1 = getitem_474 = copy__180 = None + copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475); arg329_1 = getitem_475 = copy__181 = None + copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476); arg330_1 = getitem_476 = copy__182 = None + copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477); arg331_1 = getitem_477 = copy__183 = None + copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478); arg332_1 = getitem_478 = copy__184 = None + copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479); arg333_1 = getitem_479 = copy__185 = None + copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480); arg334_1 = getitem_480 = copy__186 = None + copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481); arg335_1 = getitem_481 = copy__187 = None + copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482); arg336_1 = getitem_482 = copy__188 = None + copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483); arg337_1 = getitem_483 = copy__189 = None + copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484); arg338_1 = getitem_484 = copy__190 = None + copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485); arg339_1 = getitem_485 = copy__191 = None + copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486); arg340_1 = getitem_486 = copy__192 = None + copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487); arg341_1 = getitem_487 = copy__193 = None + copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488); arg342_1 = getitem_488 = copy__194 = None + copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489); arg343_1 = getitem_489 = copy__195 = None + copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490); arg344_1 = getitem_490 = copy__196 = None + copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491); arg345_1 = getitem_491 = copy__197 = None + copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492); arg346_1 = getitem_492 = copy__198 = None + copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493); arg347_1 = getitem_493 = copy__199 = None + copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494); arg348_1 = getitem_494 = copy__200 = None + copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495); arg349_1 = getitem_495 = copy__201 = None + copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496); arg350_1 = getitem_496 = copy__202 = None + copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497); arg351_1 = getitem_497 = copy__203 = None + copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498); arg352_1 = getitem_498 = copy__204 = None + copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499); arg353_1 = getitem_499 = copy__205 = None + copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500); arg354_1 = getitem_500 = copy__206 = None + copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501); arg355_1 = getitem_501 = copy__207 = None + copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502); arg356_1 = getitem_502 = copy__208 = None + copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503); arg357_1 = getitem_503 = copy__209 = None + copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504); arg358_1 = getitem_504 = copy__210 = None + copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505); arg359_1 = getitem_505 = copy__211 = None + copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506); arg360_1 = getitem_506 = copy__212 = None + copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507); arg361_1 = getitem_507 = copy__213 = None + copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508); arg362_1 = getitem_508 = copy__214 = None + copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509); arg363_1 = getitem_509 = copy__215 = None + copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510); arg364_1 = getitem_510 = copy__216 = None + copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511); arg365_1 = getitem_511 = copy__217 = None + copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512); arg366_1 = getitem_512 = copy__218 = None + copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513); arg367_1 = getitem_513 = copy__219 = None + copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514); arg368_1 = getitem_514 = copy__220 = None + copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515); arg369_1 = getitem_515 = copy__221 = None + copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516); arg370_1 = getitem_516 = copy__222 = None + copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517); arg371_1 = getitem_517 = copy__223 = None + copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518); arg372_1 = getitem_518 = copy__224 = None + copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519); arg373_1 = getitem_519 = copy__225 = None + copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520); arg374_1 = getitem_520 = copy__226 = None + copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521); arg375_1 = getitem_521 = copy__227 = None + copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522); arg376_1 = getitem_522 = copy__228 = None + copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523); arg377_1 = getitem_523 = copy__229 = None + copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524); arg378_1 = getitem_524 = copy__230 = None + copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525); arg379_1 = getitem_525 = copy__231 = None + copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526); arg380_1 = getitem_526 = copy__232 = None + copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527); arg381_1 = getitem_527 = copy__233 = None + copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528); arg382_1 = getitem_528 = copy__234 = None + copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529); arg383_1 = getitem_529 = copy__235 = None + copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530); arg384_1 = getitem_530 = copy__236 = None + copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531); arg385_1 = getitem_531 = copy__237 = None + copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532); arg386_1 = getitem_532 = copy__238 = None + copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533); arg387_1 = getitem_533 = copy__239 = None + copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534); arg388_1 = getitem_534 = copy__240 = None + copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535); arg389_1 = getitem_535 = copy__241 = None + copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536); arg390_1 = getitem_536 = copy__242 = None + copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537); arg391_1 = getitem_537 = copy__243 = None + copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538); arg392_1 = getitem_538 = copy__244 = None + copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539); arg393_1 = getitem_539 = copy__245 = None + copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540); arg394_1 = getitem_540 = copy__246 = None + copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541); arg395_1 = getitem_541 = copy__247 = None + copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542); arg396_1 = getitem_542 = copy__248 = None + copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543); arg397_1 = getitem_543 = copy__249 = None + copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544); arg398_1 = getitem_544 = copy__250 = None + copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545); arg399_1 = getitem_545 = copy__251 = None + copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546); arg400_1 = getitem_546 = copy__252 = None + copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547); arg401_1 = getitem_547 = copy__253 = None + copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548); arg402_1 = getitem_548 = copy__254 = None + copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549); arg403_1 = getitem_549 = copy__255 = None + copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550); arg404_1 = getitem_550 = copy__256 = None + copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551); arg405_1 = getitem_551 = copy__257 = None + copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552); arg406_1 = getitem_552 = copy__258 = None + copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553); arg407_1 = getitem_553 = copy__259 = None + copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554); arg408_1 = getitem_554 = copy__260 = None + copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555); arg409_1 = getitem_555 = copy__261 = None + copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556); arg410_1 = getitem_556 = copy__262 = None + copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557); arg411_1 = getitem_557 = copy__263 = None + copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558); arg412_1 = getitem_558 = copy__264 = None + copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559); arg413_1 = getitem_559 = copy__265 = None + copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560); arg414_1 = getitem_560 = copy__266 = None + copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561); arg415_1 = getitem_561 = copy__267 = None + copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562); arg416_1 = getitem_562 = copy__268 = None + copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563); arg417_1 = getitem_563 = copy__269 = None + copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564); arg418_1 = getitem_564 = copy__270 = None + copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565); arg419_1 = getitem_565 = copy__271 = None + copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566); arg420_1 = getitem_566 = copy__272 = None + copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567); arg421_1 = getitem_567 = copy__273 = None + copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568); arg422_1 = getitem_568 = copy__274 = None + copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569); arg423_1 = getitem_569 = copy__275 = None + copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570); arg424_1 = getitem_570 = copy__276 = None + copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571); arg425_1 = getitem_571 = copy__277 = None + copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572); arg426_1 = getitem_572 = copy__278 = None + copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573); arg427_1 = getitem_573 = copy__279 = None + copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574); arg428_1 = getitem_574 = copy__280 = None + copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575); arg429_1 = getitem_575 = copy__281 = None + copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576); arg430_1 = getitem_576 = copy__282 = None + copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577); arg431_1 = getitem_577 = copy__283 = None + copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578); arg432_1 = getitem_578 = copy__284 = None + copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579); arg433_1 = getitem_579 = copy__285 = None + copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580); arg434_1 = getitem_580 = copy__286 = None + copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581); arg435_1 = getitem_581 = copy__287 = None + copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582); arg436_1 = getitem_582 = copy__288 = None + copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583); arg437_1 = getitem_583 = copy__289 = None + copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584); arg438_1 = getitem_584 = copy__290 = None + copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585); arg439_1 = getitem_585 = copy__291 = None + copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586); arg440_1 = getitem_586 = copy__292 = None + copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587); arg441_1 = getitem_587 = copy__293 = None + copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588); arg442_1 = getitem_588 = copy__294 = None + copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589); arg443_1 = getitem_589 = copy__295 = None + copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590); arg444_1 = getitem_590 = copy__296 = None + copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591); arg445_1 = getitem_591 = copy__297 = None + copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888); arg446_1 = getitem_888 = copy__298 = None + copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890); arg447_1 = getitem_890 = copy__299 = None + copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891); arg448_1 = getitem_891 = copy__300 = None + copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892); arg449_1 = getitem_892 = copy__301 = None + copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893); arg450_1 = getitem_893 = copy__302 = None + copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894); arg451_1 = getitem_894 = copy__303 = None + copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895); arg452_1 = getitem_895 = copy__304 = None + copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896); arg453_1 = getitem_896 = copy__305 = None + copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897); arg454_1 = getitem_897 = copy__306 = None + copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898); arg455_1 = getitem_898 = copy__307 = None + copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899); arg456_1 = getitem_899 = copy__308 = None + copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900); arg457_1 = getitem_900 = copy__309 = None + copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901); arg458_1 = getitem_901 = copy__310 = None + copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902); arg459_1 = getitem_902 = copy__311 = None + copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903); arg460_1 = getitem_903 = copy__312 = None + copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904); arg461_1 = getitem_904 = copy__313 = None + copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905); arg462_1 = getitem_905 = copy__314 = None + copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906); arg463_1 = getitem_906 = copy__315 = None + copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907); arg464_1 = getitem_907 = copy__316 = None + copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908); arg465_1 = getitem_908 = copy__317 = None + copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909); arg466_1 = getitem_909 = copy__318 = None + copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910); arg467_1 = getitem_910 = copy__319 = None + copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911); arg468_1 = getitem_911 = copy__320 = None + copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912); arg469_1 = getitem_912 = copy__321 = None + copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913); arg470_1 = getitem_913 = copy__322 = None + copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914); arg471_1 = getitem_914 = copy__323 = None + copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915); arg472_1 = getitem_915 = copy__324 = None + copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916); arg473_1 = getitem_916 = copy__325 = None + copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917); arg474_1 = getitem_917 = copy__326 = None + copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918); arg475_1 = getitem_918 = copy__327 = None + copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919); arg476_1 = getitem_919 = copy__328 = None + copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920); arg477_1 = getitem_920 = copy__329 = None + copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921); arg478_1 = getitem_921 = copy__330 = None + copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922); arg479_1 = getitem_922 = copy__331 = None + copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923); arg480_1 = getitem_923 = copy__332 = None + copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924); arg481_1 = getitem_924 = copy__333 = None + copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925); arg482_1 = getitem_925 = copy__334 = None + copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926); arg483_1 = getitem_926 = copy__335 = None + copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927); arg484_1 = getitem_927 = copy__336 = None + copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928); arg485_1 = getitem_928 = copy__337 = None + copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929); arg486_1 = getitem_929 = copy__338 = None + copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930); arg487_1 = getitem_930 = copy__339 = None + copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931); arg488_1 = getitem_931 = copy__340 = None + copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932); arg489_1 = getitem_932 = copy__341 = None + copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933); arg490_1 = getitem_933 = copy__342 = None + copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934); arg491_1 = getitem_934 = copy__343 = None + copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935); arg492_1 = getitem_935 = copy__344 = None + copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936); arg493_1 = getitem_936 = copy__345 = None + copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937); arg494_1 = getitem_937 = copy__346 = None + copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938); arg495_1 = getitem_938 = copy__347 = None + copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939); arg496_1 = getitem_939 = copy__348 = None + copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940); arg497_1 = getitem_940 = copy__349 = None + copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941); arg498_1 = getitem_941 = copy__350 = None + copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942); arg499_1 = getitem_942 = copy__351 = None + copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943); arg500_1 = getitem_943 = copy__352 = None + copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944); arg501_1 = getitem_944 = copy__353 = None + copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945); arg502_1 = getitem_945 = copy__354 = None + copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946); arg503_1 = getitem_946 = copy__355 = None + copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947); arg504_1 = getitem_947 = copy__356 = None + copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948); arg505_1 = getitem_948 = copy__357 = None + copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949); arg506_1 = getitem_949 = copy__358 = None + copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950); arg507_1 = getitem_950 = copy__359 = None + copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951); arg508_1 = getitem_951 = copy__360 = None + copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952); arg509_1 = getitem_952 = copy__361 = None + copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953); arg510_1 = getitem_953 = copy__362 = None + copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954); arg511_1 = getitem_954 = copy__363 = None + copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955); arg512_1 = getitem_955 = copy__364 = None + copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956); arg513_1 = getitem_956 = copy__365 = None + copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957); arg514_1 = getitem_957 = copy__366 = None + copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958); arg515_1 = getitem_958 = copy__367 = None + copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959); arg516_1 = getitem_959 = copy__368 = None + copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960); arg517_1 = getitem_960 = copy__369 = None + copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961); arg518_1 = getitem_961 = copy__370 = None + copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962); arg519_1 = getitem_962 = copy__371 = None + copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963); arg520_1 = getitem_963 = copy__372 = None + copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964); arg521_1 = getitem_964 = copy__373 = None + copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965); arg522_1 = getitem_965 = copy__374 = None + copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966); arg523_1 = getitem_966 = copy__375 = None + copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967); arg524_1 = getitem_967 = copy__376 = None + copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968); arg525_1 = getitem_968 = copy__377 = None + copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969); arg526_1 = getitem_969 = copy__378 = None + copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970); arg527_1 = getitem_970 = copy__379 = None + copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971); arg528_1 = getitem_971 = copy__380 = None + copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972); arg529_1 = getitem_972 = copy__381 = None + copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973); arg530_1 = getitem_973 = copy__382 = None + copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974); arg531_1 = getitem_974 = copy__383 = None + copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975); arg532_1 = getitem_975 = copy__384 = None + copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976); arg533_1 = getitem_976 = copy__385 = None + copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977); arg534_1 = getitem_977 = copy__386 = None + copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978); arg535_1 = getitem_978 = copy__387 = None + copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979); arg536_1 = getitem_979 = copy__388 = None + copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980); arg537_1 = getitem_980 = copy__389 = None + copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981); arg538_1 = getitem_981 = copy__390 = None + copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982); arg539_1 = getitem_982 = copy__391 = None + copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983); arg540_1 = getitem_983 = copy__392 = None + copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984); arg541_1 = getitem_984 = copy__393 = None + copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985); arg542_1 = getitem_985 = copy__394 = None + copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986); arg543_1 = getitem_986 = copy__395 = None + copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987); arg544_1 = getitem_987 = copy__396 = None + copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988); arg545_1 = getitem_988 = copy__397 = None + copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989); arg546_1 = getitem_989 = copy__398 = None + copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990); arg547_1 = getitem_990 = copy__399 = None + copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991); arg548_1 = getitem_991 = copy__400 = None + copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992); arg549_1 = getitem_992 = copy__401 = None + copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993); arg550_1 = getitem_993 = copy__402 = None + copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994); arg551_1 = getitem_994 = copy__403 = None + copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995); arg552_1 = getitem_995 = copy__404 = None + copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996); arg553_1 = getitem_996 = copy__405 = None + copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997); arg554_1 = getitem_997 = copy__406 = None + copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998); arg555_1 = getitem_998 = copy__407 = None + copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999); arg556_1 = getitem_999 = copy__408 = None + copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000); arg557_1 = getitem_1000 = copy__409 = None + copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001); arg558_1 = getitem_1001 = copy__410 = None + copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002); arg559_1 = getitem_1002 = copy__411 = None + copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003); arg560_1 = getitem_1003 = copy__412 = None + copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004); arg561_1 = getitem_1004 = copy__413 = None + copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005); arg562_1 = getitem_1005 = copy__414 = None + copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006); arg563_1 = getitem_1006 = copy__415 = None + copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007); arg564_1 = getitem_1007 = copy__416 = None + copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008); arg565_1 = getitem_1008 = copy__417 = None + copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009); arg566_1 = getitem_1009 = copy__418 = None + copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010); arg567_1 = getitem_1010 = copy__419 = None + copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011); arg568_1 = getitem_1011 = copy__420 = None + copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012); arg569_1 = getitem_1012 = copy__421 = None + copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013); arg570_1 = getitem_1013 = copy__422 = None + copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014); arg571_1 = getitem_1014 = copy__423 = None + copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015); arg572_1 = getitem_1015 = copy__424 = None + copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016); arg573_1 = getitem_1016 = copy__425 = None + copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017); arg574_1 = getitem_1017 = copy__426 = None + copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018); arg575_1 = getitem_1018 = copy__427 = None + copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019); arg576_1 = getitem_1019 = copy__428 = None + copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020); arg577_1 = getitem_1020 = copy__429 = None + copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021); arg578_1 = getitem_1021 = copy__430 = None + copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022); arg579_1 = getitem_1022 = copy__431 = None + copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023); arg580_1 = getitem_1023 = copy__432 = None + copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024); arg581_1 = getitem_1024 = copy__433 = None + copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025); arg582_1 = getitem_1025 = copy__434 = None + copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026); arg583_1 = getitem_1026 = copy__435 = None + copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027); arg584_1 = getitem_1027 = copy__436 = None + copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028); arg585_1 = getitem_1028 = copy__437 = None + copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029); arg586_1 = getitem_1029 = copy__438 = None + copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030); arg587_1 = getitem_1030 = copy__439 = None + copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031); arg588_1 = getitem_1031 = copy__440 = None + copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032); arg589_1 = getitem_1032 = copy__441 = None + copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033); arg590_1 = getitem_1033 = copy__442 = None + copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034); arg591_1 = getitem_1034 = copy__443 = None + copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035); arg592_1 = getitem_1035 = copy__444 = None + copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem); arg593_1 = getitem = copy__445 = None + copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2); arg594_1 = getitem_2 = copy__446 = None + copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3); arg595_1 = getitem_3 = copy__447 = None + copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4); arg596_1 = getitem_4 = copy__448 = None + copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5); arg597_1 = getitem_5 = copy__449 = None + copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6); arg598_1 = getitem_6 = copy__450 = None + copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7); arg599_1 = getitem_7 = copy__451 = None + copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8); arg600_1 = getitem_8 = copy__452 = None + copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9); arg601_1 = getitem_9 = copy__453 = None + copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10); arg602_1 = getitem_10 = copy__454 = None + copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11); arg603_1 = getitem_11 = copy__455 = None + copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12); arg604_1 = getitem_12 = copy__456 = None + copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13); arg605_1 = getitem_13 = copy__457 = None + copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14); arg606_1 = getitem_14 = copy__458 = None + copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15); arg607_1 = getitem_15 = copy__459 = None + copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16); arg608_1 = getitem_16 = copy__460 = None + copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17); arg609_1 = getitem_17 = copy__461 = None + copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18); arg610_1 = getitem_18 = copy__462 = None + copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19); arg611_1 = getitem_19 = copy__463 = None + copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20); arg612_1 = getitem_20 = copy__464 = None + copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21); arg613_1 = getitem_21 = copy__465 = None + copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22); arg614_1 = getitem_22 = copy__466 = None + copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23); arg615_1 = getitem_23 = copy__467 = None + copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24); arg616_1 = getitem_24 = copy__468 = None + copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25); arg617_1 = getitem_25 = copy__469 = None + copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26); arg618_1 = getitem_26 = copy__470 = None + copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27); arg619_1 = getitem_27 = copy__471 = None + copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28); arg620_1 = getitem_28 = copy__472 = None + copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29); arg621_1 = getitem_29 = copy__473 = None + copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30); arg622_1 = getitem_30 = copy__474 = None + copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31); arg623_1 = getitem_31 = copy__475 = None + copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32); arg624_1 = getitem_32 = copy__476 = None + copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33); arg625_1 = getitem_33 = copy__477 = None + copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34); arg626_1 = getitem_34 = copy__478 = None + copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35); arg627_1 = getitem_35 = copy__479 = None + copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36); arg628_1 = getitem_36 = copy__480 = None + copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37); arg629_1 = getitem_37 = copy__481 = None + copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38); arg630_1 = getitem_38 = copy__482 = None + copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39); arg631_1 = getitem_39 = copy__483 = None + copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40); arg632_1 = getitem_40 = copy__484 = None + copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41); arg633_1 = getitem_41 = copy__485 = None + copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42); arg634_1 = getitem_42 = copy__486 = None + copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43); arg635_1 = getitem_43 = copy__487 = None + copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44); arg636_1 = getitem_44 = copy__488 = None + copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45); arg637_1 = getitem_45 = copy__489 = None + copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46); arg638_1 = getitem_46 = copy__490 = None + copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47); arg639_1 = getitem_47 = copy__491 = None + copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48); arg640_1 = getitem_48 = copy__492 = None + copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49); arg641_1 = getitem_49 = copy__493 = None + copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50); arg642_1 = getitem_50 = copy__494 = None + copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51); arg643_1 = getitem_51 = copy__495 = None + copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52); arg644_1 = getitem_52 = copy__496 = None + copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53); arg645_1 = getitem_53 = copy__497 = None + copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54); arg646_1 = getitem_54 = copy__498 = None + copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55); arg647_1 = getitem_55 = copy__499 = None + copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56); arg648_1 = getitem_56 = copy__500 = None + copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57); arg649_1 = getitem_57 = copy__501 = None + copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58); arg650_1 = getitem_58 = copy__502 = None + copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59); arg651_1 = getitem_59 = copy__503 = None + copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60); arg652_1 = getitem_60 = copy__504 = None + copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61); arg653_1 = getitem_61 = copy__505 = None + copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62); arg654_1 = getitem_62 = copy__506 = None + copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63); arg655_1 = getitem_63 = copy__507 = None + copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64); arg656_1 = getitem_64 = copy__508 = None + copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65); arg657_1 = getitem_65 = copy__509 = None + copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66); arg658_1 = getitem_66 = copy__510 = None + copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67); arg659_1 = getitem_67 = copy__511 = None + copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68); arg660_1 = getitem_68 = copy__512 = None + copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69); arg661_1 = getitem_69 = copy__513 = None + copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70); arg662_1 = getitem_70 = copy__514 = None + copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71); arg663_1 = getitem_71 = copy__515 = None + copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72); arg664_1 = getitem_72 = copy__516 = None + copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73); arg665_1 = getitem_73 = copy__517 = None + copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74); arg666_1 = getitem_74 = copy__518 = None + copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75); arg667_1 = getitem_75 = copy__519 = None + copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76); arg668_1 = getitem_76 = copy__520 = None + copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77); arg669_1 = getitem_77 = copy__521 = None + copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78); arg670_1 = getitem_78 = copy__522 = None + copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79); arg671_1 = getitem_79 = copy__523 = None + copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80); arg672_1 = getitem_80 = copy__524 = None + copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81); arg673_1 = getitem_81 = copy__525 = None + copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82); arg674_1 = getitem_82 = copy__526 = None + copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83); arg675_1 = getitem_83 = copy__527 = None + copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84); arg676_1 = getitem_84 = copy__528 = None + copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85); arg677_1 = getitem_85 = copy__529 = None + copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86); arg678_1 = getitem_86 = copy__530 = None + copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87); arg679_1 = getitem_87 = copy__531 = None + copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88); arg680_1 = getitem_88 = copy__532 = None + copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89); arg681_1 = getitem_89 = copy__533 = None + copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90); arg682_1 = getitem_90 = copy__534 = None + copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91); arg683_1 = getitem_91 = copy__535 = None + copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92); arg684_1 = getitem_92 = copy__536 = None + copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93); arg685_1 = getitem_93 = copy__537 = None + copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94); arg686_1 = getitem_94 = copy__538 = None + copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95); arg687_1 = getitem_95 = copy__539 = None + copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96); arg688_1 = getitem_96 = copy__540 = None + copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97); arg689_1 = getitem_97 = copy__541 = None + copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98); arg690_1 = getitem_98 = copy__542 = None + copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99); arg691_1 = getitem_99 = copy__543 = None + copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100); arg692_1 = getitem_100 = copy__544 = None + copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101); arg693_1 = getitem_101 = copy__545 = None + copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102); arg694_1 = getitem_102 = copy__546 = None + copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103); arg695_1 = getitem_103 = copy__547 = None + copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104); arg696_1 = getitem_104 = copy__548 = None + copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105); arg697_1 = getitem_105 = copy__549 = None + copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106); arg698_1 = getitem_106 = copy__550 = None + copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107); arg699_1 = getitem_107 = copy__551 = None + copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108); arg700_1 = getitem_108 = copy__552 = None + copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109); arg701_1 = getitem_109 = copy__553 = None + copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110); arg702_1 = getitem_110 = copy__554 = None + copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111); arg703_1 = getitem_111 = copy__555 = None + copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112); arg704_1 = getitem_112 = copy__556 = None + copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113); arg705_1 = getitem_113 = copy__557 = None + copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114); arg706_1 = getitem_114 = copy__558 = None + copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115); arg707_1 = getitem_115 = copy__559 = None + copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116); arg708_1 = getitem_116 = copy__560 = None + copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117); arg709_1 = getitem_117 = copy__561 = None + copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118); arg710_1 = getitem_118 = copy__562 = None + copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119); arg711_1 = getitem_119 = copy__563 = None + copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120); arg712_1 = getitem_120 = copy__564 = None + copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121); arg713_1 = getitem_121 = copy__565 = None + copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122); arg714_1 = getitem_122 = copy__566 = None + copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123); arg715_1 = getitem_123 = copy__567 = None + copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124); arg716_1 = getitem_124 = copy__568 = None + copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125); arg717_1 = getitem_125 = copy__569 = None + copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126); arg718_1 = getitem_126 = copy__570 = None + copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127); arg719_1 = getitem_127 = copy__571 = None + copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128); arg720_1 = getitem_128 = copy__572 = None + copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129); arg721_1 = getitem_129 = copy__573 = None + copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130); arg722_1 = getitem_130 = copy__574 = None + copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131); arg723_1 = getitem_131 = copy__575 = None + copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132); arg724_1 = getitem_132 = copy__576 = None + copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133); arg725_1 = getitem_133 = copy__577 = None + copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134); arg726_1 = getitem_134 = copy__578 = None + copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135); arg727_1 = getitem_135 = copy__579 = None + copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136); arg728_1 = getitem_136 = copy__580 = None + copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137); arg729_1 = getitem_137 = copy__581 = None + copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138); arg730_1 = getitem_138 = copy__582 = None + copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139); arg731_1 = getitem_139 = copy__583 = None + copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140); arg732_1 = getitem_140 = copy__584 = None + copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141); arg733_1 = getitem_141 = copy__585 = None + copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142); arg734_1 = getitem_142 = copy__586 = None + copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143); arg735_1 = getitem_143 = copy__587 = None + copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144); arg736_1 = getitem_144 = copy__588 = None + copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145); arg737_1 = getitem_145 = copy__589 = None + copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146); arg738_1 = getitem_146 = copy__590 = None + copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147); arg739_1 = getitem_147 = copy__591 = None + return () + + def load_args(reader): + buf0 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf0, (50304, 768), is_leaf=True) # arg0_1 + buf1 = reader.storage(None, 3145728, device=device(type='cuda', index=0)) + reader.tensor(buf1, (1024, 768), is_leaf=True) # arg1_1 + buf2 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf2, (768,), is_leaf=True) # arg2_1 + buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf3, (768,), is_leaf=True) # arg3_1 + buf4 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf4, (2304, 768), is_leaf=True) # arg4_1 + buf5 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf5, (2304,), is_leaf=True) # arg5_1 + buf6 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf6, (768, 768), is_leaf=True) # arg6_1 + buf7 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf7, (768,), is_leaf=True) # arg7_1 + buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf8, (768,), is_leaf=True) # arg8_1 + buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf9, (768,), is_leaf=True) # arg9_1 + buf10 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf10, (3072, 768), is_leaf=True) # arg10_1 + buf11 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf11, (3072,), is_leaf=True) # arg11_1 + buf12 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf12, (768, 3072), is_leaf=True) # arg12_1 + buf13 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf13, (768,), is_leaf=True) # arg13_1 + buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf14, (768,), is_leaf=True) # arg14_1 + buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf15, (768,), is_leaf=True) # arg15_1 + buf16 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf16, (2304, 768), is_leaf=True) # arg16_1 + buf17 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf17, (2304,), is_leaf=True) # arg17_1 + buf18 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf18, (768, 768), is_leaf=True) # arg18_1 + buf19 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf19, (768,), is_leaf=True) # arg19_1 + buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf20, (768,), is_leaf=True) # arg20_1 + buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf21, (768,), is_leaf=True) # arg21_1 + buf22 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf22, (3072, 768), is_leaf=True) # arg22_1 + buf23 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf23, (3072,), is_leaf=True) # arg23_1 + buf24 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf24, (768, 3072), is_leaf=True) # arg24_1 + buf25 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf25, (768,), is_leaf=True) # arg25_1 + buf26 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf26, (768,), is_leaf=True) # arg26_1 + buf27 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf27, (768,), is_leaf=True) # arg27_1 + buf28 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf28, (2304, 768), is_leaf=True) # arg28_1 + buf29 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf29, (2304,), is_leaf=True) # arg29_1 + buf30 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf30, (768, 768), is_leaf=True) # arg30_1 + buf31 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf31, (768,), is_leaf=True) # arg31_1 + buf32 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf32, (768,), is_leaf=True) # arg32_1 + buf33 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf33, (768,), is_leaf=True) # arg33_1 + buf34 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf34, (3072, 768), is_leaf=True) # arg34_1 + buf35 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf35, (3072,), is_leaf=True) # arg35_1 + buf36 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf36, (768, 3072), is_leaf=True) # arg36_1 + buf37 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf37, (768,), is_leaf=True) # arg37_1 + buf38 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf38, (768,), is_leaf=True) # arg38_1 + buf39 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf39, (768,), is_leaf=True) # arg39_1 + buf40 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf40, (2304, 768), is_leaf=True) # arg40_1 + buf41 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf41, (2304,), is_leaf=True) # arg41_1 + buf42 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf42, (768, 768), is_leaf=True) # arg42_1 + buf43 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf43, (768,), is_leaf=True) # arg43_1 + buf44 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf44, (768,), is_leaf=True) # arg44_1 + buf45 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf45, (768,), is_leaf=True) # arg45_1 + buf46 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf46, (3072, 768), is_leaf=True) # arg46_1 + buf47 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf47, (3072,), is_leaf=True) # arg47_1 + buf48 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf48, (768, 3072), is_leaf=True) # arg48_1 + buf49 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf49, (768,), is_leaf=True) # arg49_1 + buf50 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf50, (768,), is_leaf=True) # arg50_1 + buf51 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf51, (768,), is_leaf=True) # arg51_1 + buf52 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf52, (2304, 768), is_leaf=True) # arg52_1 + buf53 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf53, (2304,), is_leaf=True) # arg53_1 + buf54 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf54, (768, 768), is_leaf=True) # arg54_1 + buf55 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf55, (768,), is_leaf=True) # arg55_1 + buf56 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf56, (768,), is_leaf=True) # arg56_1 + buf57 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf57, (768,), is_leaf=True) # arg57_1 + buf58 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf58, (3072, 768), is_leaf=True) # arg58_1 + buf59 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf59, (3072,), is_leaf=True) # arg59_1 + buf60 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf60, (768, 3072), is_leaf=True) # arg60_1 + buf61 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf61, (768,), is_leaf=True) # arg61_1 + buf62 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf62, (768,), is_leaf=True) # arg62_1 + buf63 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf63, (768,), is_leaf=True) # arg63_1 + buf64 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf64, (2304, 768), is_leaf=True) # arg64_1 + buf65 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf65, (2304,), is_leaf=True) # arg65_1 + buf66 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf66, (768, 768), is_leaf=True) # arg66_1 + buf67 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf67, (768,), is_leaf=True) # arg67_1 + buf68 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf68, (768,), is_leaf=True) # arg68_1 + buf69 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf69, (768,), is_leaf=True) # arg69_1 + buf70 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf70, (3072, 768), is_leaf=True) # arg70_1 + buf71 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf71, (3072,), is_leaf=True) # arg71_1 + buf72 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf72, (768, 3072), is_leaf=True) # arg72_1 + buf73 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf73, (768,), is_leaf=True) # arg73_1 + buf74 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf74, (768,), is_leaf=True) # arg74_1 + buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf75, (768,), is_leaf=True) # arg75_1 + buf76 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf76, (2304, 768), is_leaf=True) # arg76_1 + buf77 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf77, (2304,), is_leaf=True) # arg77_1 + buf78 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf78, (768, 768), is_leaf=True) # arg78_1 + buf79 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf79, (768,), is_leaf=True) # arg79_1 + buf80 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf80, (768,), is_leaf=True) # arg80_1 + buf81 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf81, (768,), is_leaf=True) # arg81_1 + buf82 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf82, (3072, 768), is_leaf=True) # arg82_1 + buf83 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf83, (3072,), is_leaf=True) # arg83_1 + buf84 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf84, (768, 3072), is_leaf=True) # arg84_1 + buf85 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf85, (768,), is_leaf=True) # arg85_1 + buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf86, (768,), is_leaf=True) # arg86_1 + buf87 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf87, (768,), is_leaf=True) # arg87_1 + buf88 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf88, (2304, 768), is_leaf=True) # arg88_1 + buf89 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf89, (2304,), is_leaf=True) # arg89_1 + buf90 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf90, (768, 768), is_leaf=True) # arg90_1 + buf91 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf91, (768,), is_leaf=True) # arg91_1 + buf92 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf92, (768,), is_leaf=True) # arg92_1 + buf93 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf93, (768,), is_leaf=True) # arg93_1 + buf94 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf94, (3072, 768), is_leaf=True) # arg94_1 + buf95 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf95, (3072,), is_leaf=True) # arg95_1 + buf96 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf96, (768, 3072), is_leaf=True) # arg96_1 + buf97 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf97, (768,), is_leaf=True) # arg97_1 + buf98 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf98, (768,), is_leaf=True) # arg98_1 + buf99 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf99, (768,), is_leaf=True) # arg99_1 + buf100 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf100, (2304, 768), is_leaf=True) # arg100_1 + buf101 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf101, (2304,), is_leaf=True) # arg101_1 + buf102 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf102, (768, 768), is_leaf=True) # arg102_1 + buf103 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf103, (768,), is_leaf=True) # arg103_1 + buf104 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf104, (768,), is_leaf=True) # arg104_1 + buf105 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf105, (768,), is_leaf=True) # arg105_1 + buf106 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf106, (3072, 768), is_leaf=True) # arg106_1 + buf107 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf107, (3072,), is_leaf=True) # arg107_1 + buf108 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf108, (768, 3072), is_leaf=True) # arg108_1 + buf109 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf109, (768,), is_leaf=True) # arg109_1 + buf110 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf110, (768,), is_leaf=True) # arg110_1 + buf111 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf111, (768,), is_leaf=True) # arg111_1 + buf112 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf112, (2304, 768), is_leaf=True) # arg112_1 + buf113 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf113, (2304,), is_leaf=True) # arg113_1 + buf114 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf114, (768, 768), is_leaf=True) # arg114_1 + buf115 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf115, (768,), is_leaf=True) # arg115_1 + buf116 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf116, (768,), is_leaf=True) # arg116_1 + buf117 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf117, (768,), is_leaf=True) # arg117_1 + buf118 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf118, (3072, 768), is_leaf=True) # arg118_1 + buf119 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf119, (3072,), is_leaf=True) # arg119_1 + buf120 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf120, (768, 3072), is_leaf=True) # arg120_1 + buf121 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf121, (768,), is_leaf=True) # arg121_1 + buf122 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf122, (768,), is_leaf=True) # arg122_1 + buf123 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf123, (768,), is_leaf=True) # arg123_1 + buf124 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf124, (2304, 768), is_leaf=True) # arg124_1 + buf125 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf125, (2304,), is_leaf=True) # arg125_1 + buf126 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf126, (768, 768), is_leaf=True) # arg126_1 + buf127 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf127, (768,), is_leaf=True) # arg127_1 + buf128 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf128, (768,), is_leaf=True) # arg128_1 + buf129 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf129, (768,), is_leaf=True) # arg129_1 + buf130 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf130, (3072, 768), is_leaf=True) # arg130_1 + buf131 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf131, (3072,), is_leaf=True) # arg131_1 + buf132 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf132, (768, 3072), is_leaf=True) # arg132_1 + buf133 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf133, (768,), is_leaf=True) # arg133_1 + buf134 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf134, (768,), is_leaf=True) # arg134_1 + buf135 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf135, (768,), is_leaf=True) # arg135_1 + buf136 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf136, (2304, 768), is_leaf=True) # arg136_1 + buf137 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf137, (2304,), is_leaf=True) # arg137_1 + buf138 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf138, (768, 768), is_leaf=True) # arg138_1 + buf139 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf139, (768,), is_leaf=True) # arg139_1 + buf140 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf140, (768,), is_leaf=True) # arg140_1 + buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf141, (768,), is_leaf=True) # arg141_1 + buf142 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf142, (3072, 768), is_leaf=True) # arg142_1 + buf143 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf143, (3072,), is_leaf=True) # arg143_1 + buf144 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf144, (768, 3072), is_leaf=True) # arg144_1 + buf145 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf145, (768,), is_leaf=True) # arg145_1 + buf146 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf146, (768,), is_leaf=True) # arg146_1 + buf147 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf147, (768,), is_leaf=True) # arg147_1 + buf148 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf148, (), is_leaf=True) # arg148_1 + buf149 = reader.storage(None, 3145728, device=device(type='cuda', index=0)) + reader.tensor(buf149, (1024, 768), is_leaf=True) # arg149_1 + buf150 = reader.storage(None, 3145728, device=device(type='cuda', index=0)) + reader.tensor(buf150, (1024, 768), is_leaf=True) # arg150_1 + buf151 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf151, (50304, 768), is_leaf=True) # arg151_1 + buf152 = reader.storage(None, 3145728, device=device(type='cuda', index=0)) + reader.tensor(buf152, (1024, 768), is_leaf=True) # arg152_1 + buf153 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf153, (768,), is_leaf=True) # arg153_1 + buf154 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf154, (768,), is_leaf=True) # arg154_1 + buf155 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf155, (2304, 768), is_leaf=True) # arg155_1 + buf156 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf156, (2304,), is_leaf=True) # arg156_1 + buf157 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf157, (768, 768), is_leaf=True) # arg157_1 + buf158 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf158, (768,), is_leaf=True) # arg158_1 + buf159 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf159, (768,), is_leaf=True) # arg159_1 + buf160 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf160, (768,), is_leaf=True) # arg160_1 + buf161 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf161, (3072, 768), is_leaf=True) # arg161_1 + buf162 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf162, (3072,), is_leaf=True) # arg162_1 + buf163 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf163, (768, 3072), is_leaf=True) # arg163_1 + buf164 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf164, (768,), is_leaf=True) # arg164_1 + buf165 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf165, (768,), is_leaf=True) # arg165_1 + buf166 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf166, (768,), is_leaf=True) # arg166_1 + buf167 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf167, (2304, 768), is_leaf=True) # arg167_1 + buf168 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf168, (2304,), is_leaf=True) # arg168_1 + buf169 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf169, (768, 768), is_leaf=True) # arg169_1 + buf170 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf170, (768,), is_leaf=True) # arg170_1 + buf171 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf171, (768,), is_leaf=True) # arg171_1 + buf172 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf172, (768,), is_leaf=True) # arg172_1 + buf173 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf173, (3072, 768), is_leaf=True) # arg173_1 + buf174 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf174, (3072,), is_leaf=True) # arg174_1 + buf175 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf175, (768, 3072), is_leaf=True) # arg175_1 + buf176 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf176, (768,), is_leaf=True) # arg176_1 + buf177 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf177, (768,), is_leaf=True) # arg177_1 + buf178 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf178, (768,), is_leaf=True) # arg178_1 + buf179 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf179, (2304, 768), is_leaf=True) # arg179_1 + buf180 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf180, (2304,), is_leaf=True) # arg180_1 + buf181 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf181, (768, 768), is_leaf=True) # arg181_1 + buf182 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf182, (768,), is_leaf=True) # arg182_1 + buf183 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf183, (768,), is_leaf=True) # arg183_1 + buf184 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf184, (768,), is_leaf=True) # arg184_1 + buf185 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf185, (3072, 768), is_leaf=True) # arg185_1 + buf186 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf186, (3072,), is_leaf=True) # arg186_1 + buf187 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf187, (768, 3072), is_leaf=True) # arg187_1 + buf188 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf188, (768,), is_leaf=True) # arg188_1 + buf189 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf189, (768,), is_leaf=True) # arg189_1 + buf190 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf190, (768,), is_leaf=True) # arg190_1 + buf191 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf191, (2304, 768), is_leaf=True) # arg191_1 + buf192 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf192, (2304,), is_leaf=True) # arg192_1 + buf193 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf193, (768, 768), is_leaf=True) # arg193_1 + buf194 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf194, (768,), is_leaf=True) # arg194_1 + buf195 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf195, (768,), is_leaf=True) # arg195_1 + buf196 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf196, (768,), is_leaf=True) # arg196_1 + buf197 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf197, (3072, 768), is_leaf=True) # arg197_1 + buf198 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf198, (3072,), is_leaf=True) # arg198_1 + buf199 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf199, (768, 3072), is_leaf=True) # arg199_1 + buf200 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf200, (768,), is_leaf=True) # arg200_1 + buf201 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf201, (768,), is_leaf=True) # arg201_1 + buf202 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf202, (768,), is_leaf=True) # arg202_1 + buf203 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf203, (2304, 768), is_leaf=True) # arg203_1 + buf204 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf204, (2304,), is_leaf=True) # arg204_1 + buf205 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf205, (768, 768), is_leaf=True) # arg205_1 + buf206 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf206, (768,), is_leaf=True) # arg206_1 + buf207 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf207, (768,), is_leaf=True) # arg207_1 + buf208 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf208, (768,), is_leaf=True) # arg208_1 + buf209 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf209, (3072, 768), is_leaf=True) # arg209_1 + buf210 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf210, (3072,), is_leaf=True) # arg210_1 + buf211 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf211, (768, 3072), is_leaf=True) # arg211_1 + buf212 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf212, (768,), is_leaf=True) # arg212_1 + buf213 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf213, (768,), is_leaf=True) # arg213_1 + buf214 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf214, (768,), is_leaf=True) # arg214_1 + buf215 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf215, (2304, 768), is_leaf=True) # arg215_1 + buf216 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf216, (2304,), is_leaf=True) # arg216_1 + buf217 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf217, (768, 768), is_leaf=True) # arg217_1 + buf218 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf218, (768,), is_leaf=True) # arg218_1 + buf219 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf219, (768,), is_leaf=True) # arg219_1 + buf220 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf220, (768,), is_leaf=True) # arg220_1 + buf221 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf221, (3072, 768), is_leaf=True) # arg221_1 + buf222 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf222, (3072,), is_leaf=True) # arg222_1 + buf223 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf223, (768, 3072), is_leaf=True) # arg223_1 + buf224 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf224, (768,), is_leaf=True) # arg224_1 + buf225 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf225, (768,), is_leaf=True) # arg225_1 + buf226 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf226, (768,), is_leaf=True) # arg226_1 + buf227 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf227, (2304, 768), is_leaf=True) # arg227_1 + buf228 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf228, (2304,), is_leaf=True) # arg228_1 + buf229 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf229, (768, 768), is_leaf=True) # arg229_1 + buf230 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf230, (768,), is_leaf=True) # arg230_1 + buf231 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf231, (768,), is_leaf=True) # arg231_1 + buf232 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf232, (768,), is_leaf=True) # arg232_1 + buf233 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf233, (3072, 768), is_leaf=True) # arg233_1 + buf234 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf234, (3072,), is_leaf=True) # arg234_1 + buf235 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf235, (768, 3072), is_leaf=True) # arg235_1 + buf236 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf236, (768,), is_leaf=True) # arg236_1 + buf237 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf237, (768,), is_leaf=True) # arg237_1 + buf238 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf238, (768,), is_leaf=True) # arg238_1 + buf239 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf239, (2304, 768), is_leaf=True) # arg239_1 + buf240 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf240, (2304,), is_leaf=True) # arg240_1 + buf241 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf241, (768, 768), is_leaf=True) # arg241_1 + buf242 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf242, (768,), is_leaf=True) # arg242_1 + buf243 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf243, (768,), is_leaf=True) # arg243_1 + buf244 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf244, (768,), is_leaf=True) # arg244_1 + buf245 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf245, (3072, 768), is_leaf=True) # arg245_1 + buf246 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf246, (3072,), is_leaf=True) # arg246_1 + buf247 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf247, (768, 3072), is_leaf=True) # arg247_1 + buf248 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf248, (768,), is_leaf=True) # arg248_1 + buf249 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf249, (768,), is_leaf=True) # arg249_1 + buf250 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf250, (768,), is_leaf=True) # arg250_1 + buf251 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf251, (2304, 768), is_leaf=True) # arg251_1 + buf252 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf252, (2304,), is_leaf=True) # arg252_1 + buf253 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf253, (768, 768), is_leaf=True) # arg253_1 + buf254 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf254, (768,), is_leaf=True) # arg254_1 + buf255 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf255, (768,), is_leaf=True) # arg255_1 + buf256 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf256, (768,), is_leaf=True) # arg256_1 + buf257 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf257, (3072, 768), is_leaf=True) # arg257_1 + buf258 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf258, (3072,), is_leaf=True) # arg258_1 + buf259 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf259, (768, 3072), is_leaf=True) # arg259_1 + buf260 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf260, (768,), is_leaf=True) # arg260_1 + buf261 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf261, (768,), is_leaf=True) # arg261_1 + buf262 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf262, (768,), is_leaf=True) # arg262_1 + buf263 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf263, (2304, 768), is_leaf=True) # arg263_1 + buf264 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf264, (2304,), is_leaf=True) # arg264_1 + buf265 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf265, (768, 768), is_leaf=True) # arg265_1 + buf266 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf266, (768,), is_leaf=True) # arg266_1 + buf267 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf267, (768,), is_leaf=True) # arg267_1 + buf268 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf268, (768,), is_leaf=True) # arg268_1 + buf269 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf269, (3072, 768), is_leaf=True) # arg269_1 + buf270 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf270, (3072,), is_leaf=True) # arg270_1 + buf271 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf271, (768, 3072), is_leaf=True) # arg271_1 + buf272 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf272, (768,), is_leaf=True) # arg272_1 + buf273 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf273, (768,), is_leaf=True) # arg273_1 + buf274 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf274, (768,), is_leaf=True) # arg274_1 + buf275 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf275, (2304, 768), is_leaf=True) # arg275_1 + buf276 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf276, (2304,), is_leaf=True) # arg276_1 + buf277 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf277, (768, 768), is_leaf=True) # arg277_1 + buf278 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf278, (768,), is_leaf=True) # arg278_1 + buf279 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf279, (768,), is_leaf=True) # arg279_1 + buf280 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf280, (768,), is_leaf=True) # arg280_1 + buf281 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf281, (3072, 768), is_leaf=True) # arg281_1 + buf282 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf282, (3072,), is_leaf=True) # arg282_1 + buf283 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf283, (768, 3072), is_leaf=True) # arg283_1 + buf284 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf284, (768,), is_leaf=True) # arg284_1 + buf285 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf285, (768,), is_leaf=True) # arg285_1 + buf286 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf286, (768,), is_leaf=True) # arg286_1 + buf287 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf287, (2304, 768), is_leaf=True) # arg287_1 + buf288 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf288, (2304,), is_leaf=True) # arg288_1 + buf289 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf289, (768, 768), is_leaf=True) # arg289_1 + buf290 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf290, (768,), is_leaf=True) # arg290_1 + buf291 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf291, (768,), is_leaf=True) # arg291_1 + buf292 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf292, (768,), is_leaf=True) # arg292_1 + buf293 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf293, (3072, 768), is_leaf=True) # arg293_1 + buf294 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf294, (3072,), is_leaf=True) # arg294_1 + buf295 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf295, (768, 3072), is_leaf=True) # arg295_1 + buf296 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf296, (768,), is_leaf=True) # arg296_1 + buf297 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf297, (768,), is_leaf=True) # arg297_1 + buf298 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf298, (768,), is_leaf=True) # arg298_1 + buf299 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf299, (50304, 768), is_leaf=True) # arg299_1 + buf300 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf300, (768,), is_leaf=True) # arg300_1 + buf301 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf301, (768,), is_leaf=True) # arg301_1 + buf302 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf302, (2304, 768), is_leaf=True) # arg302_1 + buf303 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf303, (2304,), is_leaf=True) # arg303_1 + buf304 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf304, (768, 768), is_leaf=True) # arg304_1 + buf305 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf305, (768,), is_leaf=True) # arg305_1 + buf306 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf306, (768,), is_leaf=True) # arg306_1 + buf307 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf307, (768,), is_leaf=True) # arg307_1 + buf308 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf308, (3072, 768), is_leaf=True) # arg308_1 + buf309 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf309, (3072,), is_leaf=True) # arg309_1 + buf310 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf310, (768, 3072), is_leaf=True) # arg310_1 + buf311 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf311, (768,), is_leaf=True) # arg311_1 + buf312 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf312, (768,), is_leaf=True) # arg312_1 + buf313 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf313, (768,), is_leaf=True) # arg313_1 + buf314 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf314, (2304, 768), is_leaf=True) # arg314_1 + buf315 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf315, (2304,), is_leaf=True) # arg315_1 + buf316 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf316, (768, 768), is_leaf=True) # arg316_1 + buf317 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf317, (768,), is_leaf=True) # arg317_1 + buf318 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf318, (768,), is_leaf=True) # arg318_1 + buf319 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf319, (768,), is_leaf=True) # arg319_1 + buf320 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf320, (3072, 768), is_leaf=True) # arg320_1 + buf321 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf321, (3072,), is_leaf=True) # arg321_1 + buf322 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf322, (768, 3072), is_leaf=True) # arg322_1 + buf323 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf323, (768,), is_leaf=True) # arg323_1 + buf324 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf324, (768,), is_leaf=True) # arg324_1 + buf325 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf325, (768,), is_leaf=True) # arg325_1 + buf326 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf326, (2304, 768), is_leaf=True) # arg326_1 + buf327 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf327, (2304,), is_leaf=True) # arg327_1 + buf328 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf328, (768, 768), is_leaf=True) # arg328_1 + buf329 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf329, (768,), is_leaf=True) # arg329_1 + buf330 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf330, (768,), is_leaf=True) # arg330_1 + buf331 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf331, (768,), is_leaf=True) # arg331_1 + buf332 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf332, (3072, 768), is_leaf=True) # arg332_1 + buf333 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf333, (3072,), is_leaf=True) # arg333_1 + buf334 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf334, (768, 3072), is_leaf=True) # arg334_1 + buf335 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf335, (768,), is_leaf=True) # arg335_1 + buf336 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf336, (768,), is_leaf=True) # arg336_1 + buf337 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf337, (768,), is_leaf=True) # arg337_1 + buf338 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf338, (2304, 768), is_leaf=True) # arg338_1 + buf339 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf339, (2304,), is_leaf=True) # arg339_1 + buf340 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf340, (768, 768), is_leaf=True) # arg340_1 + buf341 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf341, (768,), is_leaf=True) # arg341_1 + buf342 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf342, (768,), is_leaf=True) # arg342_1 + buf343 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf343, (768,), is_leaf=True) # arg343_1 + buf344 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf344, (3072, 768), is_leaf=True) # arg344_1 + buf345 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf345, (3072,), is_leaf=True) # arg345_1 + buf346 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf346, (768, 3072), is_leaf=True) # arg346_1 + buf347 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf347, (768,), is_leaf=True) # arg347_1 + buf348 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf348, (768,), is_leaf=True) # arg348_1 + buf349 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf349, (768,), is_leaf=True) # arg349_1 + buf350 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf350, (2304, 768), is_leaf=True) # arg350_1 + buf351 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf351, (2304,), is_leaf=True) # arg351_1 + buf352 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf352, (768, 768), is_leaf=True) # arg352_1 + buf353 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf353, (768,), is_leaf=True) # arg353_1 + buf354 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf354, (768,), is_leaf=True) # arg354_1 + buf355 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf355, (768,), is_leaf=True) # arg355_1 + buf356 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf356, (3072, 768), is_leaf=True) # arg356_1 + buf357 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf357, (3072,), is_leaf=True) # arg357_1 + buf358 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf358, (768, 3072), is_leaf=True) # arg358_1 + buf359 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf359, (768,), is_leaf=True) # arg359_1 + buf360 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf360, (768,), is_leaf=True) # arg360_1 + buf361 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf361, (768,), is_leaf=True) # arg361_1 + buf362 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf362, (2304, 768), is_leaf=True) # arg362_1 + buf363 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf363, (2304,), is_leaf=True) # arg363_1 + buf364 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf364, (768, 768), is_leaf=True) # arg364_1 + buf365 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf365, (768,), is_leaf=True) # arg365_1 + buf366 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf366, (768,), is_leaf=True) # arg366_1 + buf367 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf367, (768,), is_leaf=True) # arg367_1 + buf368 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf368, (3072, 768), is_leaf=True) # arg368_1 + buf369 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf369, (3072,), is_leaf=True) # arg369_1 + buf370 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf370, (768, 3072), is_leaf=True) # arg370_1 + buf371 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf371, (768,), is_leaf=True) # arg371_1 + buf372 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf372, (768,), is_leaf=True) # arg372_1 + buf373 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf373, (768,), is_leaf=True) # arg373_1 + buf374 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf374, (2304, 768), is_leaf=True) # arg374_1 + buf375 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf375, (2304,), is_leaf=True) # arg375_1 + buf376 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf376, (768, 768), is_leaf=True) # arg376_1 + buf377 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf377, (768,), is_leaf=True) # arg377_1 + buf378 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf378, (768,), is_leaf=True) # arg378_1 + buf379 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf379, (768,), is_leaf=True) # arg379_1 + buf380 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf380, (3072, 768), is_leaf=True) # arg380_1 + buf381 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf381, (3072,), is_leaf=True) # arg381_1 + buf382 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf382, (768, 3072), is_leaf=True) # arg382_1 + buf383 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf383, (768,), is_leaf=True) # arg383_1 + buf384 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf384, (768,), is_leaf=True) # arg384_1 + buf385 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf385, (768,), is_leaf=True) # arg385_1 + buf386 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf386, (2304, 768), is_leaf=True) # arg386_1 + buf387 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf387, (2304,), is_leaf=True) # arg387_1 + buf388 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf388, (768, 768), is_leaf=True) # arg388_1 + buf389 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf389, (768,), is_leaf=True) # arg389_1 + buf390 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf390, (768,), is_leaf=True) # arg390_1 + buf391 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf391, (768,), is_leaf=True) # arg391_1 + buf392 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf392, (3072, 768), is_leaf=True) # arg392_1 + buf393 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf393, (3072,), is_leaf=True) # arg393_1 + buf394 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf394, (768, 3072), is_leaf=True) # arg394_1 + buf395 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf395, (768,), is_leaf=True) # arg395_1 + buf396 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf396, (768,), is_leaf=True) # arg396_1 + buf397 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf397, (768,), is_leaf=True) # arg397_1 + buf398 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf398, (2304, 768), is_leaf=True) # arg398_1 + buf399 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf399, (2304,), is_leaf=True) # arg399_1 + buf400 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf400, (768, 768), is_leaf=True) # arg400_1 + buf401 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf401, (768,), is_leaf=True) # arg401_1 + buf402 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf402, (768,), is_leaf=True) # arg402_1 + buf403 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf403, (768,), is_leaf=True) # arg403_1 + buf404 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf404, (3072, 768), is_leaf=True) # arg404_1 + buf405 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf405, (3072,), is_leaf=True) # arg405_1 + buf406 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf406, (768, 3072), is_leaf=True) # arg406_1 + buf407 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf407, (768,), is_leaf=True) # arg407_1 + buf408 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf408, (768,), is_leaf=True) # arg408_1 + buf409 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf409, (768,), is_leaf=True) # arg409_1 + buf410 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf410, (2304, 768), is_leaf=True) # arg410_1 + buf411 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf411, (2304,), is_leaf=True) # arg411_1 + buf412 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf412, (768, 768), is_leaf=True) # arg412_1 + buf413 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf413, (768,), is_leaf=True) # arg413_1 + buf414 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf414, (768,), is_leaf=True) # arg414_1 + buf415 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf415, (768,), is_leaf=True) # arg415_1 + buf416 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf416, (3072, 768), is_leaf=True) # arg416_1 + buf417 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf417, (3072,), is_leaf=True) # arg417_1 + buf418 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf418, (768, 3072), is_leaf=True) # arg418_1 + buf419 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf419, (768,), is_leaf=True) # arg419_1 + buf420 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf420, (768,), is_leaf=True) # arg420_1 + buf421 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf421, (768,), is_leaf=True) # arg421_1 + buf422 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf422, (2304, 768), is_leaf=True) # arg422_1 + buf423 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf423, (2304,), is_leaf=True) # arg423_1 + buf424 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf424, (768, 768), is_leaf=True) # arg424_1 + buf425 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf425, (768,), is_leaf=True) # arg425_1 + buf426 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf426, (768,), is_leaf=True) # arg426_1 + buf427 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf427, (768,), is_leaf=True) # arg427_1 + buf428 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf428, (3072, 768), is_leaf=True) # arg428_1 + buf429 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf429, (3072,), is_leaf=True) # arg429_1 + buf430 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf430, (768, 3072), is_leaf=True) # arg430_1 + buf431 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf431, (768,), is_leaf=True) # arg431_1 + buf432 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf432, (768,), is_leaf=True) # arg432_1 + buf433 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf433, (768,), is_leaf=True) # arg433_1 + buf434 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf434, (2304, 768), is_leaf=True) # arg434_1 + buf435 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf435, (2304,), is_leaf=True) # arg435_1 + buf436 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf436, (768, 768), is_leaf=True) # arg436_1 + buf437 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf437, (768,), is_leaf=True) # arg437_1 + buf438 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf438, (768,), is_leaf=True) # arg438_1 + buf439 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf439, (768,), is_leaf=True) # arg439_1 + buf440 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf440, (3072, 768), is_leaf=True) # arg440_1 + buf441 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf441, (3072,), is_leaf=True) # arg441_1 + buf442 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf442, (768, 3072), is_leaf=True) # arg442_1 + buf443 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf443, (768,), is_leaf=True) # arg443_1 + buf444 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf444, (768,), is_leaf=True) # arg444_1 + buf445 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf445, (768,), is_leaf=True) # arg445_1 + buf446 = reader.storage(None, 154533888, device=device(type='cuda', index=0)) + reader.tensor(buf446, (50304, 768), is_leaf=True) # arg446_1 + buf447 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf447, (768,), is_leaf=True) # arg447_1 + buf448 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf448, (768,), is_leaf=True) # arg448_1 + buf449 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf449, (2304, 768), is_leaf=True) # arg449_1 + buf450 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf450, (2304,), is_leaf=True) # arg450_1 + buf451 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf451, (768, 768), is_leaf=True) # arg451_1 + buf452 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf452, (768,), is_leaf=True) # arg452_1 + buf453 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf453, (768,), is_leaf=True) # arg453_1 + buf454 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf454, (768,), is_leaf=True) # arg454_1 + buf455 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf455, (3072, 768), is_leaf=True) # arg455_1 + buf456 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf456, (3072,), is_leaf=True) # arg456_1 + buf457 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf457, (768, 3072), is_leaf=True) # arg457_1 + buf458 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf458, (768,), is_leaf=True) # arg458_1 + buf459 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf459, (768,), is_leaf=True) # arg459_1 + buf460 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf460, (768,), is_leaf=True) # arg460_1 + buf461 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf461, (2304, 768), is_leaf=True) # arg461_1 + buf462 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf462, (2304,), is_leaf=True) # arg462_1 + buf463 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf463, (768, 768), is_leaf=True) # arg463_1 + buf464 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf464, (768,), is_leaf=True) # arg464_1 + buf465 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf465, (768,), is_leaf=True) # arg465_1 + buf466 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf466, (768,), is_leaf=True) # arg466_1 + buf467 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf467, (3072, 768), is_leaf=True) # arg467_1 + buf468 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf468, (3072,), is_leaf=True) # arg468_1 + buf469 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf469, (768, 3072), is_leaf=True) # arg469_1 + buf470 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf470, (768,), is_leaf=True) # arg470_1 + buf471 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf471, (768,), is_leaf=True) # arg471_1 + buf472 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf472, (768,), is_leaf=True) # arg472_1 + buf473 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf473, (2304, 768), is_leaf=True) # arg473_1 + buf474 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf474, (2304,), is_leaf=True) # arg474_1 + buf475 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf475, (768, 768), is_leaf=True) # arg475_1 + buf476 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf476, (768,), is_leaf=True) # arg476_1 + buf477 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf477, (768,), is_leaf=True) # arg477_1 + buf478 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf478, (768,), is_leaf=True) # arg478_1 + buf479 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf479, (3072, 768), is_leaf=True) # arg479_1 + buf480 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf480, (3072,), is_leaf=True) # arg480_1 + buf481 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf481, (768, 3072), is_leaf=True) # arg481_1 + buf482 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf482, (768,), is_leaf=True) # arg482_1 + buf483 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf483, (768,), is_leaf=True) # arg483_1 + buf484 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf484, (768,), is_leaf=True) # arg484_1 + buf485 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf485, (2304, 768), is_leaf=True) # arg485_1 + buf486 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf486, (2304,), is_leaf=True) # arg486_1 + buf487 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf487, (768, 768), is_leaf=True) # arg487_1 + buf488 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf488, (768,), is_leaf=True) # arg488_1 + buf489 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf489, (768,), is_leaf=True) # arg489_1 + buf490 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf490, (768,), is_leaf=True) # arg490_1 + buf491 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf491, (3072, 768), is_leaf=True) # arg491_1 + buf492 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf492, (3072,), is_leaf=True) # arg492_1 + buf493 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf493, (768, 3072), is_leaf=True) # arg493_1 + buf494 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf494, (768,), is_leaf=True) # arg494_1 + buf495 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf495, (768,), is_leaf=True) # arg495_1 + buf496 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf496, (768,), is_leaf=True) # arg496_1 + buf497 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf497, (2304, 768), is_leaf=True) # arg497_1 + buf498 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf498, (2304,), is_leaf=True) # arg498_1 + buf499 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf499, (768, 768), is_leaf=True) # arg499_1 + buf500 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf500, (768,), is_leaf=True) # arg500_1 + buf501 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf501, (768,), is_leaf=True) # arg501_1 + buf502 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf502, (768,), is_leaf=True) # arg502_1 + buf503 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf503, (3072, 768), is_leaf=True) # arg503_1 + buf504 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf504, (3072,), is_leaf=True) # arg504_1 + buf505 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf505, (768, 3072), is_leaf=True) # arg505_1 + buf506 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf506, (768,), is_leaf=True) # arg506_1 + buf507 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf507, (768,), is_leaf=True) # arg507_1 + buf508 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf508, (768,), is_leaf=True) # arg508_1 + buf509 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf509, (2304, 768), is_leaf=True) # arg509_1 + buf510 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf510, (2304,), is_leaf=True) # arg510_1 + buf511 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf511, (768, 768), is_leaf=True) # arg511_1 + buf512 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf512, (768,), is_leaf=True) # arg512_1 + buf513 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf513, (768,), is_leaf=True) # arg513_1 + buf514 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf514, (768,), is_leaf=True) # arg514_1 + buf515 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf515, (3072, 768), is_leaf=True) # arg515_1 + buf516 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf516, (3072,), is_leaf=True) # arg516_1 + buf517 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf517, (768, 3072), is_leaf=True) # arg517_1 + buf518 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf518, (768,), is_leaf=True) # arg518_1 + buf519 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf519, (768,), is_leaf=True) # arg519_1 + buf520 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf520, (768,), is_leaf=True) # arg520_1 + buf521 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf521, (2304, 768), is_leaf=True) # arg521_1 + buf522 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf522, (2304,), is_leaf=True) # arg522_1 + buf523 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf523, (768, 768), is_leaf=True) # arg523_1 + buf524 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf524, (768,), is_leaf=True) # arg524_1 + buf525 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf525, (768,), is_leaf=True) # arg525_1 + buf526 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf526, (768,), is_leaf=True) # arg526_1 + buf527 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf527, (3072, 768), is_leaf=True) # arg527_1 + buf528 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf528, (3072,), is_leaf=True) # arg528_1 + buf529 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf529, (768, 3072), is_leaf=True) # arg529_1 + buf530 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf530, (768,), is_leaf=True) # arg530_1 + buf531 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf531, (768,), is_leaf=True) # arg531_1 + buf532 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf532, (768,), is_leaf=True) # arg532_1 + buf533 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf533, (2304, 768), is_leaf=True) # arg533_1 + buf534 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf534, (2304,), is_leaf=True) # arg534_1 + buf535 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf535, (768, 768), is_leaf=True) # arg535_1 + buf536 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf536, (768,), is_leaf=True) # arg536_1 + buf537 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf537, (768,), is_leaf=True) # arg537_1 + buf538 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf538, (768,), is_leaf=True) # arg538_1 + buf539 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf539, (3072, 768), is_leaf=True) # arg539_1 + buf540 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf540, (3072,), is_leaf=True) # arg540_1 + buf541 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf541, (768, 3072), is_leaf=True) # arg541_1 + buf542 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf542, (768,), is_leaf=True) # arg542_1 + buf543 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf543, (768,), is_leaf=True) # arg543_1 + buf544 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf544, (768,), is_leaf=True) # arg544_1 + buf545 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf545, (2304, 768), is_leaf=True) # arg545_1 + buf546 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf546, (2304,), is_leaf=True) # arg546_1 + buf547 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf547, (768, 768), is_leaf=True) # arg547_1 + buf548 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf548, (768,), is_leaf=True) # arg548_1 + buf549 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf549, (768,), is_leaf=True) # arg549_1 + buf550 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf550, (768,), is_leaf=True) # arg550_1 + buf551 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf551, (3072, 768), is_leaf=True) # arg551_1 + buf552 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf552, (3072,), is_leaf=True) # arg552_1 + buf553 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf553, (768, 3072), is_leaf=True) # arg553_1 + buf554 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf554, (768,), is_leaf=True) # arg554_1 + buf555 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf555, (768,), is_leaf=True) # arg555_1 + buf556 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf556, (768,), is_leaf=True) # arg556_1 + buf557 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf557, (2304, 768), is_leaf=True) # arg557_1 + buf558 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf558, (2304,), is_leaf=True) # arg558_1 + buf559 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf559, (768, 768), is_leaf=True) # arg559_1 + buf560 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf560, (768,), is_leaf=True) # arg560_1 + buf561 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf561, (768,), is_leaf=True) # arg561_1 + buf562 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf562, (768,), is_leaf=True) # arg562_1 + buf563 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf563, (3072, 768), is_leaf=True) # arg563_1 + buf564 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf564, (3072,), is_leaf=True) # arg564_1 + buf565 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf565, (768, 3072), is_leaf=True) # arg565_1 + buf566 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf566, (768,), is_leaf=True) # arg566_1 + buf567 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf567, (768,), is_leaf=True) # arg567_1 + buf568 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf568, (768,), is_leaf=True) # arg568_1 + buf569 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf569, (2304, 768), is_leaf=True) # arg569_1 + buf570 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf570, (2304,), is_leaf=True) # arg570_1 + buf571 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf571, (768, 768), is_leaf=True) # arg571_1 + buf572 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf572, (768,), is_leaf=True) # arg572_1 + buf573 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf573, (768,), is_leaf=True) # arg573_1 + buf574 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf574, (768,), is_leaf=True) # arg574_1 + buf575 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf575, (3072, 768), is_leaf=True) # arg575_1 + buf576 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf576, (3072,), is_leaf=True) # arg576_1 + buf577 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf577, (768, 3072), is_leaf=True) # arg577_1 + buf578 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf578, (768,), is_leaf=True) # arg578_1 + buf579 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf579, (768,), is_leaf=True) # arg579_1 + buf580 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf580, (768,), is_leaf=True) # arg580_1 + buf581 = reader.storage(None, 7077888, device=device(type='cuda', index=0)) + reader.tensor(buf581, (2304, 768), is_leaf=True) # arg581_1 + buf582 = reader.storage(None, 9216, device=device(type='cuda', index=0)) + reader.tensor(buf582, (2304,), is_leaf=True) # arg582_1 + buf583 = reader.storage(None, 2359296, device=device(type='cuda', index=0)) + reader.tensor(buf583, (768, 768), is_leaf=True) # arg583_1 + buf584 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf584, (768,), is_leaf=True) # arg584_1 + buf585 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf585, (768,), is_leaf=True) # arg585_1 + buf586 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf586, (768,), is_leaf=True) # arg586_1 + buf587 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf587, (3072, 768), is_leaf=True) # arg587_1 + buf588 = reader.storage(None, 12288, device=device(type='cuda', index=0)) + reader.tensor(buf588, (3072,), is_leaf=True) # arg588_1 + buf589 = reader.storage(None, 9437184, device=device(type='cuda', index=0)) + reader.tensor(buf589, (768, 3072), is_leaf=True) # arg589_1 + buf590 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf590, (768,), is_leaf=True) # arg590_1 + buf591 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf591, (768,), is_leaf=True) # arg591_1 + buf592 = reader.storage(None, 3072, device=device(type='cuda', index=0)) + reader.tensor(buf592, (768,), is_leaf=True) # arg592_1 + buf593 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf593, (), is_leaf=True) # arg593_1 + buf594 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf594, (), is_leaf=True) # arg594_1 + buf595 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf595, (), is_leaf=True) # arg595_1 + buf596 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf596, (), is_leaf=True) # arg596_1 + buf597 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf597, (), is_leaf=True) # arg597_1 + buf598 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf598, (), is_leaf=True) # arg598_1 + buf599 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf599, (), is_leaf=True) # arg599_1 + buf600 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf600, (), is_leaf=True) # arg600_1 + buf601 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf601, (), is_leaf=True) # arg601_1 + buf602 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf602, (), is_leaf=True) # arg602_1 + buf603 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf603, (), is_leaf=True) # arg603_1 + buf604 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf604, (), is_leaf=True) # arg604_1 + buf605 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf605, (), is_leaf=True) # arg605_1 + buf606 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf606, (), is_leaf=True) # arg606_1 + buf607 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf607, (), is_leaf=True) # arg607_1 + buf608 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf608, (), is_leaf=True) # arg608_1 + buf609 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf609, (), is_leaf=True) # arg609_1 + buf610 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf610, (), is_leaf=True) # arg610_1 + buf611 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf611, (), is_leaf=True) # arg611_1 + buf612 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf612, (), is_leaf=True) # arg612_1 + buf613 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf613, (), is_leaf=True) # arg613_1 + buf614 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf614, (), is_leaf=True) # arg614_1 + buf615 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf615, (), is_leaf=True) # arg615_1 + buf616 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf616, (), is_leaf=True) # arg616_1 + buf617 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf617, (), is_leaf=True) # arg617_1 + buf618 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf618, (), is_leaf=True) # arg618_1 + buf619 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf619, (), is_leaf=True) # arg619_1 + buf620 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf620, (), is_leaf=True) # arg620_1 + buf621 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf621, (), is_leaf=True) # arg621_1 + buf622 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf622, (), is_leaf=True) # arg622_1 + buf623 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf623, (), is_leaf=True) # arg623_1 + buf624 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf624, (), is_leaf=True) # arg624_1 + buf625 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf625, (), is_leaf=True) # arg625_1 + buf626 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf626, (), is_leaf=True) # arg626_1 + buf627 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf627, (), is_leaf=True) # arg627_1 + buf628 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf628, (), is_leaf=True) # arg628_1 + buf629 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf629, (), is_leaf=True) # arg629_1 + buf630 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf630, (), is_leaf=True) # arg630_1 + buf631 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf631, (), is_leaf=True) # arg631_1 + buf632 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf632, (), is_leaf=True) # arg632_1 + buf633 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf633, (), is_leaf=True) # arg633_1 + buf634 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf634, (), is_leaf=True) # arg634_1 + buf635 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf635, (), is_leaf=True) # arg635_1 + buf636 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf636, (), is_leaf=True) # arg636_1 + buf637 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf637, (), is_leaf=True) # arg637_1 + buf638 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf638, (), is_leaf=True) # arg638_1 + buf639 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf639, (), is_leaf=True) # arg639_1 + buf640 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf640, (), is_leaf=True) # arg640_1 + buf641 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf641, (), is_leaf=True) # arg641_1 + buf642 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf642, (), is_leaf=True) # arg642_1 + buf643 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf643, (), is_leaf=True) # arg643_1 + buf644 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf644, (), is_leaf=True) # arg644_1 + buf645 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf645, (), is_leaf=True) # arg645_1 + buf646 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf646, (), is_leaf=True) # arg646_1 + buf647 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf647, (), is_leaf=True) # arg647_1 + buf648 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf648, (), is_leaf=True) # arg648_1 + buf649 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf649, (), is_leaf=True) # arg649_1 + buf650 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf650, (), is_leaf=True) # arg650_1 + buf651 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf651, (), is_leaf=True) # arg651_1 + buf652 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf652, (), is_leaf=True) # arg652_1 + buf653 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf653, (), is_leaf=True) # arg653_1 + buf654 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf654, (), is_leaf=True) # arg654_1 + buf655 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf655, (), is_leaf=True) # arg655_1 + buf656 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf656, (), is_leaf=True) # arg656_1 + buf657 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf657, (), is_leaf=True) # arg657_1 + buf658 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf658, (), is_leaf=True) # arg658_1 + buf659 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf659, (), is_leaf=True) # arg659_1 + buf660 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf660, (), is_leaf=True) # arg660_1 + buf661 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf661, (), is_leaf=True) # arg661_1 + buf662 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf662, (), is_leaf=True) # arg662_1 + buf663 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf663, (), is_leaf=True) # arg663_1 + buf664 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf664, (), is_leaf=True) # arg664_1 + buf665 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf665, (), is_leaf=True) # arg665_1 + buf666 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf666, (), is_leaf=True) # arg666_1 + buf667 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf667, (), is_leaf=True) # arg667_1 + buf668 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf668, (), is_leaf=True) # arg668_1 + buf669 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf669, (), is_leaf=True) # arg669_1 + buf670 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf670, (), is_leaf=True) # arg670_1 + buf671 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf671, (), is_leaf=True) # arg671_1 + buf672 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf672, (), is_leaf=True) # arg672_1 + buf673 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf673, (), is_leaf=True) # arg673_1 + buf674 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf674, (), is_leaf=True) # arg674_1 + buf675 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf675, (), is_leaf=True) # arg675_1 + buf676 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf676, (), is_leaf=True) # arg676_1 + buf677 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf677, (), is_leaf=True) # arg677_1 + buf678 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf678, (), is_leaf=True) # arg678_1 + buf679 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf679, (), is_leaf=True) # arg679_1 + buf680 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf680, (), is_leaf=True) # arg680_1 + buf681 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf681, (), is_leaf=True) # arg681_1 + buf682 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf682, (), is_leaf=True) # arg682_1 + buf683 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf683, (), is_leaf=True) # arg683_1 + buf684 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf684, (), is_leaf=True) # arg684_1 + buf685 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf685, (), is_leaf=True) # arg685_1 + buf686 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf686, (), is_leaf=True) # arg686_1 + buf687 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf687, (), is_leaf=True) # arg687_1 + buf688 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf688, (), is_leaf=True) # arg688_1 + buf689 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf689, (), is_leaf=True) # arg689_1 + buf690 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf690, (), is_leaf=True) # arg690_1 + buf691 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf691, (), is_leaf=True) # arg691_1 + buf692 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf692, (), is_leaf=True) # arg692_1 + buf693 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf693, (), is_leaf=True) # arg693_1 + buf694 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf694, (), is_leaf=True) # arg694_1 + buf695 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf695, (), is_leaf=True) # arg695_1 + buf696 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf696, (), is_leaf=True) # arg696_1 + buf697 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf697, (), is_leaf=True) # arg697_1 + buf698 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf698, (), is_leaf=True) # arg698_1 + buf699 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf699, (), is_leaf=True) # arg699_1 + buf700 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf700, (), is_leaf=True) # arg700_1 + buf701 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf701, (), is_leaf=True) # arg701_1 + buf702 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf702, (), is_leaf=True) # arg702_1 + buf703 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf703, (), is_leaf=True) # arg703_1 + buf704 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf704, (), is_leaf=True) # arg704_1 + buf705 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf705, (), is_leaf=True) # arg705_1 + buf706 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf706, (), is_leaf=True) # arg706_1 + buf707 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf707, (), is_leaf=True) # arg707_1 + buf708 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf708, (), is_leaf=True) # arg708_1 + buf709 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf709, (), is_leaf=True) # arg709_1 + buf710 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf710, (), is_leaf=True) # arg710_1 + buf711 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf711, (), is_leaf=True) # arg711_1 + buf712 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf712, (), is_leaf=True) # arg712_1 + buf713 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf713, (), is_leaf=True) # arg713_1 + buf714 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf714, (), is_leaf=True) # arg714_1 + buf715 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf715, (), is_leaf=True) # arg715_1 + buf716 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf716, (), is_leaf=True) # arg716_1 + buf717 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf717, (), is_leaf=True) # arg717_1 + buf718 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf718, (), is_leaf=True) # arg718_1 + buf719 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf719, (), is_leaf=True) # arg719_1 + buf720 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf720, (), is_leaf=True) # arg720_1 + buf721 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf721, (), is_leaf=True) # arg721_1 + buf722 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf722, (), is_leaf=True) # arg722_1 + buf723 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf723, (), is_leaf=True) # arg723_1 + buf724 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf724, (), is_leaf=True) # arg724_1 + buf725 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf725, (), is_leaf=True) # arg725_1 + buf726 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf726, (), is_leaf=True) # arg726_1 + buf727 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf727, (), is_leaf=True) # arg727_1 + buf728 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf728, (), is_leaf=True) # arg728_1 + buf729 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf729, (), is_leaf=True) # arg729_1 + buf730 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf730, (), is_leaf=True) # arg730_1 + buf731 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf731, (), is_leaf=True) # arg731_1 + buf732 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf732, (), is_leaf=True) # arg732_1 + buf733 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf733, (), is_leaf=True) # arg733_1 + buf734 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf734, (), is_leaf=True) # arg734_1 + buf735 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf735, (), is_leaf=True) # arg735_1 + buf736 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf736, (), is_leaf=True) # arg736_1 + buf737 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf737, (), is_leaf=True) # arg737_1 + buf738 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf738, (), is_leaf=True) # arg738_1 + buf739 = reader.storage(None, 4, device=device(type='cuda', index=0)) + reader.tensor(buf739, (), is_leaf=True) # arg739_1 + load_args._version = 0 + mod = Repro() + if __name__ == '__main__': + from torch._dynamo.repro.after_aot import run_repro + with torch.no_grad(): + run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None) + # To run it separately, do + # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None) + # mod(*args) +V0806 13:56:11.466000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "1871900304899d884f7ee96375f19aa0"} + class (torch.nn.Module): + def forward(self, arg0_1: "f32[50304, 768][768, 1]cuda:0", arg1_1: "f32[1024, 768][768, 1]cuda:0", arg2_1: "f32[768][1]cuda:0", arg3_1: "f32[768][1]cuda:0", arg4_1: "f32[2304, 768][768, 1]cuda:0", arg5_1: "f32[2304][1]cuda:0", arg6_1: "f32[768, 768][768, 1]cuda:0", arg7_1: "f32[768][1]cuda:0", arg8_1: "f32[768][1]cuda:0", arg9_1: "f32[768][1]cuda:0", arg10_1: "f32[3072, 768][768, 1]cuda:0", arg11_1: "f32[3072][1]cuda:0", arg12_1: "f32[768, 3072][3072, 1]cuda:0", arg13_1: "f32[768][1]cuda:0", arg14_1: "f32[768][1]cuda:0", arg15_1: "f32[768][1]cuda:0", arg16_1: "f32[2304, 768][768, 1]cuda:0", arg17_1: "f32[2304][1]cuda:0", arg18_1: "f32[768, 768][768, 1]cuda:0", arg19_1: "f32[768][1]cuda:0", arg20_1: "f32[768][1]cuda:0", arg21_1: "f32[768][1]cuda:0", arg22_1: "f32[3072, 768][768, 1]cuda:0", arg23_1: "f32[3072][1]cuda:0", arg24_1: "f32[768, 3072][3072, 1]cuda:0", arg25_1: "f32[768][1]cuda:0", arg26_1: "f32[768][1]cuda:0", arg27_1: "f32[768][1]cuda:0", arg28_1: "f32[2304, 768][768, 1]cuda:0", arg29_1: "f32[2304][1]cuda:0", arg30_1: "f32[768, 768][768, 1]cuda:0", arg31_1: "f32[768][1]cuda:0", arg32_1: "f32[768][1]cuda:0", arg33_1: "f32[768][1]cuda:0", arg34_1: "f32[3072, 768][768, 1]cuda:0", arg35_1: "f32[3072][1]cuda:0", arg36_1: "f32[768, 3072][3072, 1]cuda:0", arg37_1: "f32[768][1]cuda:0", arg38_1: "f32[768][1]cuda:0", arg39_1: "f32[768][1]cuda:0", arg40_1: "f32[2304, 768][768, 1]cuda:0", arg41_1: "f32[2304][1]cuda:0", arg42_1: "f32[768, 768][768, 1]cuda:0", arg43_1: "f32[768][1]cuda:0", arg44_1: "f32[768][1]cuda:0", arg45_1: "f32[768][1]cuda:0", arg46_1: "f32[3072, 768][768, 1]cuda:0", arg47_1: "f32[3072][1]cuda:0", arg48_1: "f32[768, 3072][3072, 1]cuda:0", arg49_1: "f32[768][1]cuda:0", arg50_1: "f32[768][1]cuda:0", arg51_1: "f32[768][1]cuda:0", arg52_1: "f32[2304, 768][768, 1]cuda:0", arg53_1: "f32[2304][1]cuda:0", arg54_1: "f32[768, 768][768, 1]cuda:0", arg55_1: "f32[768][1]cuda:0", arg56_1: "f32[768][1]cuda:0", arg57_1: "f32[768][1]cuda:0", arg58_1: "f32[3072, 768][768, 1]cuda:0", arg59_1: "f32[3072][1]cuda:0", arg60_1: "f32[768, 3072][3072, 1]cuda:0", arg61_1: "f32[768][1]cuda:0", arg62_1: "f32[768][1]cuda:0", arg63_1: "f32[768][1]cuda:0", arg64_1: "f32[2304, 768][768, 1]cuda:0", arg65_1: "f32[2304][1]cuda:0", arg66_1: "f32[768, 768][768, 1]cuda:0", arg67_1: "f32[768][1]cuda:0", arg68_1: "f32[768][1]cuda:0", arg69_1: "f32[768][1]cuda:0", arg70_1: "f32[3072, 768][768, 1]cuda:0", arg71_1: "f32[3072][1]cuda:0", arg72_1: "f32[768, 3072][3072, 1]cuda:0", arg73_1: "f32[768][1]cuda:0", arg74_1: "f32[768][1]cuda:0", arg75_1: "f32[768][1]cuda:0", arg76_1: "f32[2304, 768][768, 1]cuda:0", arg77_1: "f32[2304][1]cuda:0", arg78_1: "f32[768, 768][768, 1]cuda:0", arg79_1: "f32[768][1]cuda:0", arg80_1: "f32[768][1]cuda:0", arg81_1: "f32[768][1]cuda:0", arg82_1: "f32[3072, 768][768, 1]cuda:0", arg83_1: "f32[3072][1]cuda:0", arg84_1: "f32[768, 3072][3072, 1]cuda:0", arg85_1: "f32[768][1]cuda:0", arg86_1: "f32[768][1]cuda:0", arg87_1: "f32[768][1]cuda:0", arg88_1: "f32[2304, 768][768, 1]cuda:0", arg89_1: "f32[2304][1]cuda:0", arg90_1: "f32[768, 768][768, 1]cuda:0", arg91_1: "f32[768][1]cuda:0", arg92_1: "f32[768][1]cuda:0", arg93_1: "f32[768][1]cuda:0", arg94_1: "f32[3072, 768][768, 1]cuda:0", arg95_1: "f32[3072][1]cuda:0", arg96_1: "f32[768, 3072][3072, 1]cuda:0", arg97_1: "f32[768][1]cuda:0", arg98_1: "f32[768][1]cuda:0", arg99_1: "f32[768][1]cuda:0", arg100_1: "f32[2304, 768][768, 1]cuda:0", arg101_1: "f32[2304][1]cuda:0", arg102_1: "f32[768, 768][768, 1]cuda:0", arg103_1: "f32[768][1]cuda:0", arg104_1: "f32[768][1]cuda:0", arg105_1: "f32[768][1]cuda:0", arg106_1: "f32[3072, 768][768, 1]cuda:0", arg107_1: "f32[3072][1]cuda:0", arg108_1: "f32[768, 3072][3072, 1]cuda:0", arg109_1: "f32[768][1]cuda:0", arg110_1: "f32[768][1]cuda:0", arg111_1: "f32[768][1]cuda:0", arg112_1: "f32[2304, 768][768, 1]cuda:0", arg113_1: "f32[2304][1]cuda:0", arg114_1: "f32[768, 768][768, 1]cuda:0", arg115_1: "f32[768][1]cuda:0", arg116_1: "f32[768][1]cuda:0", arg117_1: "f32[768][1]cuda:0", arg118_1: "f32[3072, 768][768, 1]cuda:0", arg119_1: "f32[3072][1]cuda:0", arg120_1: "f32[768, 3072][3072, 1]cuda:0", arg121_1: "f32[768][1]cuda:0", arg122_1: "f32[768][1]cuda:0", arg123_1: "f32[768][1]cuda:0", arg124_1: "f32[2304, 768][768, 1]cuda:0", arg125_1: "f32[2304][1]cuda:0", arg126_1: "f32[768, 768][768, 1]cuda:0", arg127_1: "f32[768][1]cuda:0", arg128_1: "f32[768][1]cuda:0", arg129_1: "f32[768][1]cuda:0", arg130_1: "f32[3072, 768][768, 1]cuda:0", arg131_1: "f32[3072][1]cuda:0", arg132_1: "f32[768, 3072][3072, 1]cuda:0", arg133_1: "f32[768][1]cuda:0", arg134_1: "f32[768][1]cuda:0", arg135_1: "f32[768][1]cuda:0", arg136_1: "f32[2304, 768][768, 1]cuda:0", arg137_1: "f32[2304][1]cuda:0", arg138_1: "f32[768, 768][768, 1]cuda:0", arg139_1: "f32[768][1]cuda:0", arg140_1: "f32[768][1]cuda:0", arg141_1: "f32[768][1]cuda:0", arg142_1: "f32[3072, 768][768, 1]cuda:0", arg143_1: "f32[3072][1]cuda:0", arg144_1: "f32[768, 3072][3072, 1]cuda:0", arg145_1: "f32[768][1]cuda:0", arg146_1: "f32[768][1]cuda:0", arg147_1: "f32[768][1]cuda:0", arg148_1: "f32[][]cuda:0", arg149_1: "f32[1024, 768][768, 1]cuda:0", arg150_1: "f32[1024, 768][768, 1]cuda:0", arg151_1: "f32[50304, 768][768, 1]cuda:0", arg152_1: "f32[1024, 768][768, 1]cuda:0", arg153_1: "f32[768][1]cuda:0", arg154_1: "f32[768][1]cuda:0", arg155_1: "f32[2304, 768][768, 1]cuda:0", arg156_1: "f32[2304][1]cuda:0", arg157_1: "f32[768, 768][768, 1]cuda:0", arg158_1: "f32[768][1]cuda:0", arg159_1: "f32[768][1]cuda:0", arg160_1: "f32[768][1]cuda:0", arg161_1: "f32[3072, 768][768, 1]cuda:0", arg162_1: "f32[3072][1]cuda:0", arg163_1: "f32[768, 3072][3072, 1]cuda:0", arg164_1: "f32[768][1]cuda:0", arg165_1: "f32[768][1]cuda:0", arg166_1: "f32[768][1]cuda:0", arg167_1: "f32[2304, 768][768, 1]cuda:0", arg168_1: "f32[2304][1]cuda:0", arg169_1: "f32[768, 768][768, 1]cuda:0", arg170_1: "f32[768][1]cuda:0", arg171_1: "f32[768][1]cuda:0", arg172_1: "f32[768][1]cuda:0", arg173_1: "f32[3072, 768][768, 1]cuda:0", arg174_1: "f32[3072][1]cuda:0", arg175_1: "f32[768, 3072][3072, 1]cuda:0", arg176_1: "f32[768][1]cuda:0", arg177_1: "f32[768][1]cuda:0", arg178_1: "f32[768][1]cuda:0", arg179_1: "f32[2304, 768][768, 1]cuda:0", arg180_1: "f32[2304][1]cuda:0", arg181_1: "f32[768, 768][768, 1]cuda:0", arg182_1: "f32[768][1]cuda:0", arg183_1: "f32[768][1]cuda:0", arg184_1: "f32[768][1]cuda:0", arg185_1: "f32[3072, 768][768, 1]cuda:0", arg186_1: "f32[3072][1]cuda:0", arg187_1: "f32[768, 3072][3072, 1]cuda:0", arg188_1: "f32[768][1]cuda:0", arg189_1: "f32[768][1]cuda:0", arg190_1: "f32[768][1]cuda:0", arg191_1: "f32[2304, 768][768, 1]cuda:0", arg192_1: "f32[2304][1]cuda:0", arg193_1: "f32[768, 768][768, 1]cuda:0", arg194_1: "f32[768][1]cuda:0", arg195_1: "f32[768][1]cuda:0", arg196_1: "f32[768][1]cuda:0", arg197_1: "f32[3072, 768][768, 1]cuda:0", arg198_1: "f32[3072][1]cuda:0", arg199_1: "f32[768, 3072][3072, 1]cuda:0", arg200_1: "f32[768][1]cuda:0", arg201_1: "f32[768][1]cuda:0", arg202_1: "f32[768][1]cuda:0", arg203_1: "f32[2304, 768][768, 1]cuda:0", arg204_1: "f32[2304][1]cuda:0", arg205_1: "f32[768, 768][768, 1]cuda:0", arg206_1: "f32[768][1]cuda:0", arg207_1: "f32[768][1]cuda:0", arg208_1: "f32[768][1]cuda:0", arg209_1: "f32[3072, 768][768, 1]cuda:0", arg210_1: "f32[3072][1]cuda:0", arg211_1: "f32[768, 3072][3072, 1]cuda:0", arg212_1: "f32[768][1]cuda:0", arg213_1: "f32[768][1]cuda:0", arg214_1: "f32[768][1]cuda:0", arg215_1: "f32[2304, 768][768, 1]cuda:0", arg216_1: "f32[2304][1]cuda:0", arg217_1: "f32[768, 768][768, 1]cuda:0", arg218_1: "f32[768][1]cuda:0", arg219_1: "f32[768][1]cuda:0", arg220_1: "f32[768][1]cuda:0", arg221_1: "f32[3072, 768][768, 1]cuda:0", arg222_1: "f32[3072][1]cuda:0", arg223_1: "f32[768, 3072][3072, 1]cuda:0", arg224_1: "f32[768][1]cuda:0", arg225_1: "f32[768][1]cuda:0", arg226_1: "f32[768][1]cuda:0", arg227_1: "f32[2304, 768][768, 1]cuda:0", arg228_1: "f32[2304][1]cuda:0", arg229_1: "f32[768, 768][768, 1]cuda:0", arg230_1: "f32[768][1]cuda:0", arg231_1: "f32[768][1]cuda:0", arg232_1: "f32[768][1]cuda:0", arg233_1: "f32[3072, 768][768, 1]cuda:0", arg234_1: "f32[3072][1]cuda:0", arg235_1: "f32[768, 3072][3072, 1]cuda:0", arg236_1: "f32[768][1]cuda:0", arg237_1: "f32[768][1]cuda:0", arg238_1: "f32[768][1]cuda:0", arg239_1: "f32[2304, 768][768, 1]cuda:0", arg240_1: "f32[2304][1]cuda:0", arg241_1: "f32[768, 768][768, 1]cuda:0", arg242_1: "f32[768][1]cuda:0", arg243_1: "f32[768][1]cuda:0", arg244_1: "f32[768][1]cuda:0", arg245_1: "f32[3072, 768][768, 1]cuda:0", arg246_1: "f32[3072][1]cuda:0", arg247_1: "f32[768, 3072][3072, 1]cuda:0", arg248_1: "f32[768][1]cuda:0", arg249_1: "f32[768][1]cuda:0", arg250_1: "f32[768][1]cuda:0", arg251_1: "f32[2304, 768][768, 1]cuda:0", arg252_1: "f32[2304][1]cuda:0", arg253_1: "f32[768, 768][768, 1]cuda:0", arg254_1: "f32[768][1]cuda:0", arg255_1: "f32[768][1]cuda:0", arg256_1: "f32[768][1]cuda:0", arg257_1: "f32[3072, 768][768, 1]cuda:0", arg258_1: "f32[3072][1]cuda:0", arg259_1: "f32[768, 3072][3072, 1]cuda:0", arg260_1: "f32[768][1]cuda:0", arg261_1: "f32[768][1]cuda:0", arg262_1: "f32[768][1]cuda:0", arg263_1: "f32[2304, 768][768, 1]cuda:0", arg264_1: "f32[2304][1]cuda:0", arg265_1: "f32[768, 768][768, 1]cuda:0", arg266_1: "f32[768][1]cuda:0", arg267_1: "f32[768][1]cuda:0", arg268_1: "f32[768][1]cuda:0", arg269_1: "f32[3072, 768][768, 1]cuda:0", arg270_1: "f32[3072][1]cuda:0", arg271_1: "f32[768, 3072][3072, 1]cuda:0", arg272_1: "f32[768][1]cuda:0", arg273_1: "f32[768][1]cuda:0", arg274_1: "f32[768][1]cuda:0", arg275_1: "f32[2304, 768][768, 1]cuda:0", arg276_1: "f32[2304][1]cuda:0", arg277_1: "f32[768, 768][768, 1]cuda:0", arg278_1: "f32[768][1]cuda:0", arg279_1: "f32[768][1]cuda:0", arg280_1: "f32[768][1]cuda:0", arg281_1: "f32[3072, 768][768, 1]cuda:0", arg282_1: "f32[3072][1]cuda:0", arg283_1: "f32[768, 3072][3072, 1]cuda:0", arg284_1: "f32[768][1]cuda:0", arg285_1: "f32[768][1]cuda:0", arg286_1: "f32[768][1]cuda:0", arg287_1: "f32[2304, 768][768, 1]cuda:0", arg288_1: "f32[2304][1]cuda:0", arg289_1: "f32[768, 768][768, 1]cuda:0", arg290_1: "f32[768][1]cuda:0", arg291_1: "f32[768][1]cuda:0", arg292_1: "f32[768][1]cuda:0", arg293_1: "f32[3072, 768][768, 1]cuda:0", arg294_1: "f32[3072][1]cuda:0", arg295_1: "f32[768, 3072][3072, 1]cuda:0", arg296_1: "f32[768][1]cuda:0", arg297_1: "f32[768][1]cuda:0", arg298_1: "f32[768][1]cuda:0", arg299_1: "f32[50304, 768][768, 1]cuda:0", arg300_1: "f32[768][1]cuda:0", arg301_1: "f32[768][1]cuda:0", arg302_1: "f32[2304, 768][768, 1]cuda:0", arg303_1: "f32[2304][1]cuda:0", arg304_1: "f32[768, 768][768, 1]cuda:0", arg305_1: "f32[768][1]cuda:0", arg306_1: "f32[768][1]cuda:0", arg307_1: "f32[768][1]cuda:0", arg308_1: "f32[3072, 768][768, 1]cuda:0", arg309_1: "f32[3072][1]cuda:0", arg310_1: "f32[768, 3072][3072, 1]cuda:0", arg311_1: "f32[768][1]cuda:0", arg312_1: "f32[768][1]cuda:0", arg313_1: "f32[768][1]cuda:0", arg314_1: "f32[2304, 768][768, 1]cuda:0", arg315_1: "f32[2304][1]cuda:0", arg316_1: "f32[768, 768][768, 1]cuda:0", arg317_1: "f32[768][1]cuda:0", arg318_1: "f32[768][1]cuda:0", arg319_1: "f32[768][1]cuda:0", arg320_1: "f32[3072, 768][768, 1]cuda:0", arg321_1: "f32[3072][1]cuda:0", arg322_1: "f32[768, 3072][3072, 1]cuda:0", arg323_1: "f32[768][1]cuda:0", arg324_1: "f32[768][1]cuda:0", arg325_1: "f32[768][1]cuda:0", arg326_1: "f32[2304, 768][768, 1]cuda:0", arg327_1: "f32[2304][1]cuda:0", arg328_1: "f32[768, 768][768, 1]cuda:0", arg329_1: "f32[768][1]cuda:0", arg330_1: "f32[768][1]cuda:0", arg331_1: "f32[768][1]cuda:0", arg332_1: "f32[3072, 768][768, 1]cuda:0", arg333_1: "f32[3072][1]cuda:0", arg334_1: "f32[768, 3072][3072, 1]cuda:0", arg335_1: "f32[768][1]cuda:0", arg336_1: "f32[768][1]cuda:0", arg337_1: "f32[768][1]cuda:0", arg338_1: "f32[2304, 768][768, 1]cuda:0", arg339_1: "f32[2304][1]cuda:0", arg340_1: "f32[768, 768][768, 1]cuda:0", arg341_1: "f32[768][1]cuda:0", arg342_1: "f32[768][1]cuda:0", arg343_1: "f32[768][1]cuda:0", arg344_1: "f32[3072, 768][768, 1]cuda:0", arg345_1: "f32[3072][1]cuda:0", arg346_1: "f32[768, 3072][3072, 1]cuda:0", arg347_1: "f32[768][1]cuda:0", arg348_1: "f32[768][1]cuda:0", arg349_1: "f32[768][1]cuda:0", arg350_1: "f32[2304, 768][768, 1]cuda:0", arg351_1: "f32[2304][1]cuda:0", arg352_1: "f32[768, 768][768, 1]cuda:0", arg353_1: "f32[768][1]cuda:0", arg354_1: "f32[768][1]cuda:0", arg355_1: "f32[768][1]cuda:0", arg356_1: "f32[3072, 768][768, 1]cuda:0", arg357_1: "f32[3072][1]cuda:0", arg358_1: "f32[768, 3072][3072, 1]cuda:0", arg359_1: "f32[768][1]cuda:0", arg360_1: "f32[768][1]cuda:0", arg361_1: "f32[768][1]cuda:0", arg362_1: "f32[2304, 768][768, 1]cuda:0", arg363_1: "f32[2304][1]cuda:0", arg364_1: "f32[768, 768][768, 1]cuda:0", arg365_1: "f32[768][1]cuda:0", arg366_1: "f32[768][1]cuda:0", arg367_1: "f32[768][1]cuda:0", arg368_1: "f32[3072, 768][768, 1]cuda:0", arg369_1: "f32[3072][1]cuda:0", arg370_1: "f32[768, 3072][3072, 1]cuda:0", arg371_1: "f32[768][1]cuda:0", arg372_1: "f32[768][1]cuda:0", arg373_1: "f32[768][1]cuda:0", arg374_1: "f32[2304, 768][768, 1]cuda:0", arg375_1: "f32[2304][1]cuda:0", arg376_1: "f32[768, 768][768, 1]cuda:0", arg377_1: "f32[768][1]cuda:0", arg378_1: "f32[768][1]cuda:0", arg379_1: "f32[768][1]cuda:0", arg380_1: "f32[3072, 768][768, 1]cuda:0", arg381_1: "f32[3072][1]cuda:0", arg382_1: "f32[768, 3072][3072, 1]cuda:0", arg383_1: "f32[768][1]cuda:0", arg384_1: "f32[768][1]cuda:0", arg385_1: "f32[768][1]cuda:0", arg386_1: "f32[2304, 768][768, 1]cuda:0", arg387_1: "f32[2304][1]cuda:0", arg388_1: "f32[768, 768][768, 1]cuda:0", arg389_1: "f32[768][1]cuda:0", arg390_1: "f32[768][1]cuda:0", arg391_1: "f32[768][1]cuda:0", arg392_1: "f32[3072, 768][768, 1]cuda:0", arg393_1: "f32[3072][1]cuda:0", arg394_1: "f32[768, 3072][3072, 1]cuda:0", arg395_1: "f32[768][1]cuda:0", arg396_1: "f32[768][1]cuda:0", arg397_1: "f32[768][1]cuda:0", arg398_1: "f32[2304, 768][768, 1]cuda:0", arg399_1: "f32[2304][1]cuda:0", arg400_1: "f32[768, 768][768, 1]cuda:0", arg401_1: "f32[768][1]cuda:0", arg402_1: "f32[768][1]cuda:0", arg403_1: "f32[768][1]cuda:0", arg404_1: "f32[3072, 768][768, 1]cuda:0", arg405_1: "f32[3072][1]cuda:0", arg406_1: "f32[768, 3072][3072, 1]cuda:0", arg407_1: "f32[768][1]cuda:0", arg408_1: "f32[768][1]cuda:0", arg409_1: "f32[768][1]cuda:0", arg410_1: "f32[2304, 768][768, 1]cuda:0", arg411_1: "f32[2304][1]cuda:0", arg412_1: "f32[768, 768][768, 1]cuda:0", arg413_1: "f32[768][1]cuda:0", arg414_1: "f32[768][1]cuda:0", arg415_1: "f32[768][1]cuda:0", arg416_1: "f32[3072, 768][768, 1]cuda:0", arg417_1: "f32[3072][1]cuda:0", arg418_1: "f32[768, 3072][3072, 1]cuda:0", arg419_1: "f32[768][1]cuda:0", arg420_1: "f32[768][1]cuda:0", arg421_1: "f32[768][1]cuda:0", arg422_1: "f32[2304, 768][768, 1]cuda:0", arg423_1: "f32[2304][1]cuda:0", arg424_1: "f32[768, 768][768, 1]cuda:0", arg425_1: "f32[768][1]cuda:0", arg426_1: "f32[768][1]cuda:0", arg427_1: "f32[768][1]cuda:0", arg428_1: "f32[3072, 768][768, 1]cuda:0", arg429_1: "f32[3072][1]cuda:0", arg430_1: "f32[768, 3072][3072, 1]cuda:0", arg431_1: "f32[768][1]cuda:0", arg432_1: "f32[768][1]cuda:0", arg433_1: "f32[768][1]cuda:0", arg434_1: "f32[2304, 768][768, 1]cuda:0", arg435_1: "f32[2304][1]cuda:0", arg436_1: "f32[768, 768][768, 1]cuda:0", arg437_1: "f32[768][1]cuda:0", arg438_1: "f32[768][1]cuda:0", arg439_1: "f32[768][1]cuda:0", arg440_1: "f32[3072, 768][768, 1]cuda:0", arg441_1: "f32[3072][1]cuda:0", arg442_1: "f32[768, 3072][3072, 1]cuda:0", arg443_1: "f32[768][1]cuda:0", arg444_1: "f32[768][1]cuda:0", arg445_1: "f32[768][1]cuda:0", arg446_1: "f32[50304, 768][768, 1]cuda:0", arg447_1: "f32[768][1]cuda:0", arg448_1: "f32[768][1]cuda:0", arg449_1: "f32[2304, 768][768, 1]cuda:0", arg450_1: "f32[2304][1]cuda:0", arg451_1: "f32[768, 768][768, 1]cuda:0", arg452_1: "f32[768][1]cuda:0", arg453_1: "f32[768][1]cuda:0", arg454_1: "f32[768][1]cuda:0", arg455_1: "f32[3072, 768][768, 1]cuda:0", arg456_1: "f32[3072][1]cuda:0", arg457_1: "f32[768, 3072][3072, 1]cuda:0", arg458_1: "f32[768][1]cuda:0", arg459_1: "f32[768][1]cuda:0", arg460_1: "f32[768][1]cuda:0", arg461_1: "f32[2304, 768][768, 1]cuda:0", arg462_1: "f32[2304][1]cuda:0", arg463_1: "f32[768, 768][768, 1]cuda:0", arg464_1: "f32[768][1]cuda:0", arg465_1: "f32[768][1]cuda:0", arg466_1: "f32[768][1]cuda:0", arg467_1: "f32[3072, 768][768, 1]cuda:0", arg468_1: "f32[3072][1]cuda:0", arg469_1: "f32[768, 3072][3072, 1]cuda:0", arg470_1: "f32[768][1]cuda:0", arg471_1: "f32[768][1]cuda:0", arg472_1: "f32[768][1]cuda:0", arg473_1: "f32[2304, 768][768, 1]cuda:0", arg474_1: "f32[2304][1]cuda:0", arg475_1: "f32[768, 768][768, 1]cuda:0", arg476_1: "f32[768][1]cuda:0", arg477_1: "f32[768][1]cuda:0", arg478_1: "f32[768][1]cuda:0", arg479_1: "f32[3072, 768][768, 1]cuda:0", arg480_1: "f32[3072][1]cuda:0", arg481_1: "f32[768, 3072][3072, 1]cuda:0", arg482_1: "f32[768][1]cuda:0", arg483_1: "f32[768][1]cuda:0", arg484_1: "f32[768][1]cuda:0", arg485_1: "f32[2304, 768][768, 1]cuda:0", arg486_1: "f32[2304][1]cuda:0", arg487_1: "f32[768, 768][768, 1]cuda:0", arg488_1: "f32[768][1]cuda:0", arg489_1: "f32[768][1]cuda:0", arg490_1: "f32[768][1]cuda:0", arg491_1: "f32[3072, 768][768, 1]cuda:0", arg492_1: "f32[3072][1]cuda:0", arg493_1: "f32[768, 3072][3072, 1]cuda:0", arg494_1: "f32[768][1]cuda:0", arg495_1: "f32[768][1]cuda:0", arg496_1: "f32[768][1]cuda:0", arg497_1: "f32[2304, 768][768, 1]cuda:0", arg498_1: "f32[2304][1]cuda:0", arg499_1: "f32[768, 768][768, 1]cuda:0", arg500_1: "f32[768][1]cuda:0", arg501_1: "f32[768][1]cuda:0", arg502_1: "f32[768][1]cuda:0", arg503_1: "f32[3072, 768][768, 1]cuda:0", arg504_1: "f32[3072][1]cuda:0", arg505_1: "f32[768, 3072][3072, 1]cuda:0", arg506_1: "f32[768][1]cuda:0", arg507_1: "f32[768][1]cuda:0", arg508_1: "f32[768][1]cuda:0", arg509_1: "f32[2304, 768][768, 1]cuda:0", arg510_1: "f32[2304][1]cuda:0", arg511_1: "f32[768, 768][768, 1]cuda:0", arg512_1: "f32[768][1]cuda:0", arg513_1: "f32[768][1]cuda:0", arg514_1: "f32[768][1]cuda:0", arg515_1: "f32[3072, 768][768, 1]cuda:0", arg516_1: "f32[3072][1]cuda:0", arg517_1: "f32[768, 3072][3072, 1]cuda:0", arg518_1: "f32[768][1]cuda:0", arg519_1: "f32[768][1]cuda:0", arg520_1: "f32[768][1]cuda:0", arg521_1: "f32[2304, 768][768, 1]cuda:0", arg522_1: "f32[2304][1]cuda:0", arg523_1: "f32[768, 768][768, 1]cuda:0", arg524_1: "f32[768][1]cuda:0", arg525_1: "f32[768][1]cuda:0", arg526_1: "f32[768][1]cuda:0", arg527_1: "f32[3072, 768][768, 1]cuda:0", arg528_1: "f32[3072][1]cuda:0", arg529_1: "f32[768, 3072][3072, 1]cuda:0", arg530_1: "f32[768][1]cuda:0", arg531_1: "f32[768][1]cuda:0", arg532_1: "f32[768][1]cuda:0", arg533_1: "f32[2304, 768][768, 1]cuda:0", arg534_1: "f32[2304][1]cuda:0", arg535_1: "f32[768, 768][768, 1]cuda:0", arg536_1: "f32[768][1]cuda:0", arg537_1: "f32[768][1]cuda:0", arg538_1: "f32[768][1]cuda:0", arg539_1: "f32[3072, 768][768, 1]cuda:0", arg540_1: "f32[3072][1]cuda:0", arg541_1: "f32[768, 3072][3072, 1]cuda:0", arg542_1: "f32[768][1]cuda:0", arg543_1: "f32[768][1]cuda:0", arg544_1: "f32[768][1]cuda:0", arg545_1: "f32[2304, 768][768, 1]cuda:0", arg546_1: "f32[2304][1]cuda:0", arg547_1: "f32[768, 768][768, 1]cuda:0", arg548_1: "f32[768][1]cuda:0", arg549_1: "f32[768][1]cuda:0", arg550_1: "f32[768][1]cuda:0", arg551_1: "f32[3072, 768][768, 1]cuda:0", arg552_1: "f32[3072][1]cuda:0", arg553_1: "f32[768, 3072][3072, 1]cuda:0", arg554_1: "f32[768][1]cuda:0", arg555_1: "f32[768][1]cuda:0", arg556_1: "f32[768][1]cuda:0", arg557_1: "f32[2304, 768][768, 1]cuda:0", arg558_1: "f32[2304][1]cuda:0", arg559_1: "f32[768, 768][768, 1]cuda:0", arg560_1: "f32[768][1]cuda:0", arg561_1: "f32[768][1]cuda:0", arg562_1: "f32[768][1]cuda:0", arg563_1: "f32[3072, 768][768, 1]cuda:0", arg564_1: "f32[3072][1]cuda:0", arg565_1: "f32[768, 3072][3072, 1]cuda:0", arg566_1: "f32[768][1]cuda:0", arg567_1: "f32[768][1]cuda:0", arg568_1: "f32[768][1]cuda:0", arg569_1: "f32[2304, 768][768, 1]cuda:0", arg570_1: "f32[2304][1]cuda:0", arg571_1: "f32[768, 768][768, 1]cuda:0", arg572_1: "f32[768][1]cuda:0", arg573_1: "f32[768][1]cuda:0", arg574_1: "f32[768][1]cuda:0", arg575_1: "f32[3072, 768][768, 1]cuda:0", arg576_1: "f32[3072][1]cuda:0", arg577_1: "f32[768, 3072][3072, 1]cuda:0", arg578_1: "f32[768][1]cuda:0", arg579_1: "f32[768][1]cuda:0", arg580_1: "f32[768][1]cuda:0", arg581_1: "f32[2304, 768][768, 1]cuda:0", arg582_1: "f32[2304][1]cuda:0", arg583_1: "f32[768, 768][768, 1]cuda:0", arg584_1: "f32[768][1]cuda:0", arg585_1: "f32[768][1]cuda:0", arg586_1: "f32[768][1]cuda:0", arg587_1: "f32[3072, 768][768, 1]cuda:0", arg588_1: "f32[3072][1]cuda:0", arg589_1: "f32[768, 3072][3072, 1]cuda:0", arg590_1: "f32[768][1]cuda:0", arg591_1: "f32[768][1]cuda:0", arg592_1: "f32[768][1]cuda:0", arg593_1: "f32[][]cuda:0", arg594_1: "f32[][]cuda:0", arg595_1: "f32[][]cuda:0", arg596_1: "f32[][]cuda:0", arg597_1: "f32[][]cuda:0", arg598_1: "f32[][]cuda:0", arg599_1: "f32[][]cuda:0", arg600_1: "f32[][]cuda:0", arg601_1: "f32[][]cuda:0", arg602_1: "f32[][]cuda:0", arg603_1: "f32[][]cuda:0", arg604_1: "f32[][]cuda:0", arg605_1: "f32[][]cuda:0", arg606_1: "f32[][]cuda:0", arg607_1: "f32[][]cuda:0", arg608_1: "f32[][]cuda:0", arg609_1: "f32[][]cuda:0", arg610_1: "f32[][]cuda:0", arg611_1: "f32[][]cuda:0", arg612_1: "f32[][]cuda:0", arg613_1: "f32[][]cuda:0", arg614_1: "f32[][]cuda:0", arg615_1: "f32[][]cuda:0", arg616_1: "f32[][]cuda:0", arg617_1: "f32[][]cuda:0", arg618_1: "f32[][]cuda:0", arg619_1: "f32[][]cuda:0", arg620_1: "f32[][]cuda:0", arg621_1: "f32[][]cuda:0", arg622_1: "f32[][]cuda:0", arg623_1: "f32[][]cuda:0", arg624_1: "f32[][]cuda:0", arg625_1: "f32[][]cuda:0", arg626_1: "f32[][]cuda:0", arg627_1: "f32[][]cuda:0", arg628_1: "f32[][]cuda:0", arg629_1: "f32[][]cuda:0", arg630_1: "f32[][]cuda:0", arg631_1: "f32[][]cuda:0", arg632_1: "f32[][]cuda:0", arg633_1: "f32[][]cuda:0", arg634_1: "f32[][]cuda:0", arg635_1: "f32[][]cuda:0", arg636_1: "f32[][]cuda:0", arg637_1: "f32[][]cuda:0", arg638_1: "f32[][]cuda:0", arg639_1: "f32[][]cuda:0", arg640_1: "f32[][]cuda:0", arg641_1: "f32[][]cuda:0", arg642_1: "f32[][]cuda:0", arg643_1: "f32[][]cuda:0", arg644_1: "f32[][]cuda:0", arg645_1: "f32[][]cuda:0", arg646_1: "f32[][]cuda:0", arg647_1: "f32[][]cuda:0", arg648_1: "f32[][]cuda:0", arg649_1: "f32[][]cuda:0", arg650_1: "f32[][]cuda:0", arg651_1: "f32[][]cuda:0", arg652_1: "f32[][]cuda:0", arg653_1: "f32[][]cuda:0", arg654_1: "f32[][]cuda:0", arg655_1: "f32[][]cuda:0", arg656_1: "f32[][]cuda:0", arg657_1: "f32[][]cuda:0", arg658_1: "f32[][]cuda:0", arg659_1: "f32[][]cuda:0", arg660_1: "f32[][]cuda:0", arg661_1: "f32[][]cuda:0", arg662_1: "f32[][]cuda:0", arg663_1: "f32[][]cuda:0", arg664_1: "f32[][]cuda:0", arg665_1: "f32[][]cuda:0", arg666_1: "f32[][]cuda:0", arg667_1: "f32[][]cuda:0", arg668_1: "f32[][]cuda:0", arg669_1: "f32[][]cuda:0", arg670_1: "f32[][]cuda:0", arg671_1: "f32[][]cuda:0", arg672_1: "f32[][]cuda:0", arg673_1: "f32[][]cuda:0", arg674_1: "f32[][]cuda:0", arg675_1: "f32[][]cuda:0", arg676_1: "f32[][]cuda:0", arg677_1: "f32[][]cuda:0", arg678_1: "f32[][]cuda:0", arg679_1: "f32[][]cuda:0", arg680_1: "f32[][]cuda:0", arg681_1: "f32[][]cuda:0", arg682_1: "f32[][]cuda:0", arg683_1: "f32[][]cuda:0", arg684_1: "f32[][]cuda:0", arg685_1: "f32[][]cuda:0", arg686_1: "f32[][]cuda:0", arg687_1: "f32[][]cuda:0", arg688_1: "f32[][]cuda:0", arg689_1: "f32[][]cuda:0", arg690_1: "f32[][]cuda:0", arg691_1: "f32[][]cuda:0", arg692_1: "f32[][]cuda:0", arg693_1: "f32[][]cuda:0", arg694_1: "f32[][]cuda:0", arg695_1: "f32[][]cuda:0", arg696_1: "f32[][]cuda:0", arg697_1: "f32[][]cuda:0", arg698_1: "f32[][]cuda:0", arg699_1: "f32[][]cuda:0", arg700_1: "f32[][]cuda:0", arg701_1: "f32[][]cuda:0", arg702_1: "f32[][]cuda:0", arg703_1: "f32[][]cuda:0", arg704_1: "f32[][]cuda:0", arg705_1: "f32[][]cuda:0", arg706_1: "f32[][]cuda:0", arg707_1: "f32[][]cuda:0", arg708_1: "f32[][]cuda:0", arg709_1: "f32[][]cuda:0", arg710_1: "f32[][]cuda:0", arg711_1: "f32[][]cuda:0", arg712_1: "f32[][]cuda:0", arg713_1: "f32[][]cuda:0", arg714_1: "f32[][]cuda:0", arg715_1: "f32[][]cuda:0", arg716_1: "f32[][]cuda:0", arg717_1: "f32[][]cuda:0", arg718_1: "f32[][]cuda:0", arg719_1: "f32[][]cuda:0", arg720_1: "f32[][]cuda:0", arg721_1: "f32[][]cuda:0", arg722_1: "f32[][]cuda:0", arg723_1: "f32[][]cuda:0", arg724_1: "f32[][]cuda:0", arg725_1: "f32[][]cuda:0", arg726_1: "f32[][]cuda:0", arg727_1: "f32[][]cuda:0", arg728_1: "f32[][]cuda:0", arg729_1: "f32[][]cuda:0", arg730_1: "f32[][]cuda:0", arg731_1: "f32[][]cuda:0", arg732_1: "f32[][]cuda:0", arg733_1: "f32[][]cuda:0", arg734_1: "f32[][]cuda:0", arg735_1: "f32[][]cuda:0", arg736_1: "f32[][]cuda:0", arg737_1: "f32[][]cuda:0", arg738_1: "f32[][]cuda:0", arg739_1: "f32[][]cuda:0"): + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1) + _foreach_add = torch.ops.aten._foreach_add_.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1); arg593_1 = arg148_1 = arg594_1 = arg595_1 = arg596_1 = arg597_1 = arg598_1 = arg599_1 = arg600_1 = arg601_1 = arg602_1 = arg603_1 = arg604_1 = arg605_1 = arg606_1 = arg607_1 = arg608_1 = arg609_1 = arg610_1 = arg611_1 = arg612_1 = arg613_1 = arg614_1 = arg615_1 = arg616_1 = arg617_1 = arg618_1 = arg619_1 = arg620_1 = arg621_1 = arg622_1 = arg623_1 = arg624_1 = arg625_1 = arg626_1 = arg627_1 = arg628_1 = arg629_1 = arg630_1 = arg631_1 = arg632_1 = arg633_1 = arg634_1 = arg635_1 = arg636_1 = arg637_1 = arg638_1 = arg639_1 = arg640_1 = arg641_1 = arg642_1 = arg643_1 = arg644_1 = arg645_1 = arg646_1 = arg647_1 = arg648_1 = arg649_1 = arg650_1 = arg651_1 = arg652_1 = arg653_1 = arg654_1 = arg655_1 = arg656_1 = arg657_1 = arg658_1 = arg659_1 = arg660_1 = arg661_1 = arg662_1 = arg663_1 = arg664_1 = arg665_1 = arg666_1 = arg667_1 = arg668_1 = arg669_1 = arg670_1 = arg671_1 = arg672_1 = arg673_1 = arg674_1 = arg675_1 = arg676_1 = arg677_1 = arg678_1 = arg679_1 = arg680_1 = arg681_1 = arg682_1 = arg683_1 = arg684_1 = arg685_1 = arg686_1 = arg687_1 = arg688_1 = arg689_1 = arg690_1 = arg691_1 = arg692_1 = arg693_1 = arg694_1 = arg695_1 = arg696_1 = arg697_1 = arg698_1 = arg699_1 = arg700_1 = arg701_1 = arg702_1 = arg703_1 = arg704_1 = arg705_1 = arg706_1 = arg707_1 = arg708_1 = arg709_1 = arg710_1 = arg711_1 = arg712_1 = arg713_1 = arg714_1 = arg715_1 = arg716_1 = arg717_1 = arg718_1 = arg719_1 = arg720_1 = arg721_1 = arg722_1 = arg723_1 = arg724_1 = arg725_1 = arg726_1 = arg727_1 = arg728_1 = arg729_1 = arg730_1 = arg731_1 = arg732_1 = arg733_1 = arg734_1 = arg735_1 = arg736_1 = arg737_1 = arg738_1 = arg739_1 = None + getitem: "f32[][]cuda:0" = _foreach_add[0] + getitem_1: "f32[][]cuda:0" = _foreach_add[1] + getitem_2: "f32[][]cuda:0" = _foreach_add[2] + getitem_3: "f32[][]cuda:0" = _foreach_add[3] + getitem_4: "f32[][]cuda:0" = _foreach_add[4] + getitem_5: "f32[][]cuda:0" = _foreach_add[5] + getitem_6: "f32[][]cuda:0" = _foreach_add[6] + getitem_7: "f32[][]cuda:0" = _foreach_add[7] + getitem_8: "f32[][]cuda:0" = _foreach_add[8] + getitem_9: "f32[][]cuda:0" = _foreach_add[9] + getitem_10: "f32[][]cuda:0" = _foreach_add[10] + getitem_11: "f32[][]cuda:0" = _foreach_add[11] + getitem_12: "f32[][]cuda:0" = _foreach_add[12] + getitem_13: "f32[][]cuda:0" = _foreach_add[13] + getitem_14: "f32[][]cuda:0" = _foreach_add[14] + getitem_15: "f32[][]cuda:0" = _foreach_add[15] + getitem_16: "f32[][]cuda:0" = _foreach_add[16] + getitem_17: "f32[][]cuda:0" = _foreach_add[17] + getitem_18: "f32[][]cuda:0" = _foreach_add[18] + getitem_19: "f32[][]cuda:0" = _foreach_add[19] + getitem_20: "f32[][]cuda:0" = _foreach_add[20] + getitem_21: "f32[][]cuda:0" = _foreach_add[21] + getitem_22: "f32[][]cuda:0" = _foreach_add[22] + getitem_23: "f32[][]cuda:0" = _foreach_add[23] + getitem_24: "f32[][]cuda:0" = _foreach_add[24] + getitem_25: "f32[][]cuda:0" = _foreach_add[25] + getitem_26: "f32[][]cuda:0" = _foreach_add[26] + getitem_27: "f32[][]cuda:0" = _foreach_add[27] + getitem_28: "f32[][]cuda:0" = _foreach_add[28] + getitem_29: "f32[][]cuda:0" = _foreach_add[29] + getitem_30: "f32[][]cuda:0" = _foreach_add[30] + getitem_31: "f32[][]cuda:0" = _foreach_add[31] + getitem_32: "f32[][]cuda:0" = _foreach_add[32] + getitem_33: "f32[][]cuda:0" = _foreach_add[33] + getitem_34: "f32[][]cuda:0" = _foreach_add[34] + getitem_35: "f32[][]cuda:0" = _foreach_add[35] + getitem_36: "f32[][]cuda:0" = _foreach_add[36] + getitem_37: "f32[][]cuda:0" = _foreach_add[37] + getitem_38: "f32[][]cuda:0" = _foreach_add[38] + getitem_39: "f32[][]cuda:0" = _foreach_add[39] + getitem_40: "f32[][]cuda:0" = _foreach_add[40] + getitem_41: "f32[][]cuda:0" = _foreach_add[41] + getitem_42: "f32[][]cuda:0" = _foreach_add[42] + getitem_43: "f32[][]cuda:0" = _foreach_add[43] + getitem_44: "f32[][]cuda:0" = _foreach_add[44] + getitem_45: "f32[][]cuda:0" = _foreach_add[45] + getitem_46: "f32[][]cuda:0" = _foreach_add[46] + getitem_47: "f32[][]cuda:0" = _foreach_add[47] + getitem_48: "f32[][]cuda:0" = _foreach_add[48] + getitem_49: "f32[][]cuda:0" = _foreach_add[49] + getitem_50: "f32[][]cuda:0" = _foreach_add[50] + getitem_51: "f32[][]cuda:0" = _foreach_add[51] + getitem_52: "f32[][]cuda:0" = _foreach_add[52] + getitem_53: "f32[][]cuda:0" = _foreach_add[53] + getitem_54: "f32[][]cuda:0" = _foreach_add[54] + getitem_55: "f32[][]cuda:0" = _foreach_add[55] + getitem_56: "f32[][]cuda:0" = _foreach_add[56] + getitem_57: "f32[][]cuda:0" = _foreach_add[57] + getitem_58: "f32[][]cuda:0" = _foreach_add[58] + getitem_59: "f32[][]cuda:0" = _foreach_add[59] + getitem_60: "f32[][]cuda:0" = _foreach_add[60] + getitem_61: "f32[][]cuda:0" = _foreach_add[61] + getitem_62: "f32[][]cuda:0" = _foreach_add[62] + getitem_63: "f32[][]cuda:0" = _foreach_add[63] + getitem_64: "f32[][]cuda:0" = _foreach_add[64] + getitem_65: "f32[][]cuda:0" = _foreach_add[65] + getitem_66: "f32[][]cuda:0" = _foreach_add[66] + getitem_67: "f32[][]cuda:0" = _foreach_add[67] + getitem_68: "f32[][]cuda:0" = _foreach_add[68] + getitem_69: "f32[][]cuda:0" = _foreach_add[69] + getitem_70: "f32[][]cuda:0" = _foreach_add[70] + getitem_71: "f32[][]cuda:0" = _foreach_add[71] + getitem_72: "f32[][]cuda:0" = _foreach_add[72] + getitem_73: "f32[][]cuda:0" = _foreach_add[73] + getitem_74: "f32[][]cuda:0" = _foreach_add[74] + getitem_75: "f32[][]cuda:0" = _foreach_add[75] + getitem_76: "f32[][]cuda:0" = _foreach_add[76] + getitem_77: "f32[][]cuda:0" = _foreach_add[77] + getitem_78: "f32[][]cuda:0" = _foreach_add[78] + getitem_79: "f32[][]cuda:0" = _foreach_add[79] + getitem_80: "f32[][]cuda:0" = _foreach_add[80] + getitem_81: "f32[][]cuda:0" = _foreach_add[81] + getitem_82: "f32[][]cuda:0" = _foreach_add[82] + getitem_83: "f32[][]cuda:0" = _foreach_add[83] + getitem_84: "f32[][]cuda:0" = _foreach_add[84] + getitem_85: "f32[][]cuda:0" = _foreach_add[85] + getitem_86: "f32[][]cuda:0" = _foreach_add[86] + getitem_87: "f32[][]cuda:0" = _foreach_add[87] + getitem_88: "f32[][]cuda:0" = _foreach_add[88] + getitem_89: "f32[][]cuda:0" = _foreach_add[89] + getitem_90: "f32[][]cuda:0" = _foreach_add[90] + getitem_91: "f32[][]cuda:0" = _foreach_add[91] + getitem_92: "f32[][]cuda:0" = _foreach_add[92] + getitem_93: "f32[][]cuda:0" = _foreach_add[93] + getitem_94: "f32[][]cuda:0" = _foreach_add[94] + getitem_95: "f32[][]cuda:0" = _foreach_add[95] + getitem_96: "f32[][]cuda:0" = _foreach_add[96] + getitem_97: "f32[][]cuda:0" = _foreach_add[97] + getitem_98: "f32[][]cuda:0" = _foreach_add[98] + getitem_99: "f32[][]cuda:0" = _foreach_add[99] + getitem_100: "f32[][]cuda:0" = _foreach_add[100] + getitem_101: "f32[][]cuda:0" = _foreach_add[101] + getitem_102: "f32[][]cuda:0" = _foreach_add[102] + getitem_103: "f32[][]cuda:0" = _foreach_add[103] + getitem_104: "f32[][]cuda:0" = _foreach_add[104] + getitem_105: "f32[][]cuda:0" = _foreach_add[105] + getitem_106: "f32[][]cuda:0" = _foreach_add[106] + getitem_107: "f32[][]cuda:0" = _foreach_add[107] + getitem_108: "f32[][]cuda:0" = _foreach_add[108] + getitem_109: "f32[][]cuda:0" = _foreach_add[109] + getitem_110: "f32[][]cuda:0" = _foreach_add[110] + getitem_111: "f32[][]cuda:0" = _foreach_add[111] + getitem_112: "f32[][]cuda:0" = _foreach_add[112] + getitem_113: "f32[][]cuda:0" = _foreach_add[113] + getitem_114: "f32[][]cuda:0" = _foreach_add[114] + getitem_115: "f32[][]cuda:0" = _foreach_add[115] + getitem_116: "f32[][]cuda:0" = _foreach_add[116] + getitem_117: "f32[][]cuda:0" = _foreach_add[117] + getitem_118: "f32[][]cuda:0" = _foreach_add[118] + getitem_119: "f32[][]cuda:0" = _foreach_add[119] + getitem_120: "f32[][]cuda:0" = _foreach_add[120] + getitem_121: "f32[][]cuda:0" = _foreach_add[121] + getitem_122: "f32[][]cuda:0" = _foreach_add[122] + getitem_123: "f32[][]cuda:0" = _foreach_add[123] + getitem_124: "f32[][]cuda:0" = _foreach_add[124] + getitem_125: "f32[][]cuda:0" = _foreach_add[125] + getitem_126: "f32[][]cuda:0" = _foreach_add[126] + getitem_127: "f32[][]cuda:0" = _foreach_add[127] + getitem_128: "f32[][]cuda:0" = _foreach_add[128] + getitem_129: "f32[][]cuda:0" = _foreach_add[129] + getitem_130: "f32[][]cuda:0" = _foreach_add[130] + getitem_131: "f32[][]cuda:0" = _foreach_add[131] + getitem_132: "f32[][]cuda:0" = _foreach_add[132] + getitem_133: "f32[][]cuda:0" = _foreach_add[133] + getitem_134: "f32[][]cuda:0" = _foreach_add[134] + getitem_135: "f32[][]cuda:0" = _foreach_add[135] + getitem_136: "f32[][]cuda:0" = _foreach_add[136] + getitem_137: "f32[][]cuda:0" = _foreach_add[137] + getitem_138: "f32[][]cuda:0" = _foreach_add[138] + getitem_139: "f32[][]cuda:0" = _foreach_add[139] + getitem_140: "f32[][]cuda:0" = _foreach_add[140] + getitem_141: "f32[][]cuda:0" = _foreach_add[141] + getitem_142: "f32[][]cuda:0" = _foreach_add[142] + getitem_143: "f32[][]cuda:0" = _foreach_add[143] + getitem_144: "f32[][]cuda:0" = _foreach_add[144] + getitem_145: "f32[][]cuda:0" = _foreach_add[145] + getitem_146: "f32[][]cuda:0" = _foreach_add[146] + getitem_147: "f32[][]cuda:0" = _foreach_add[147]; _foreach_add = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1) + _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1]) + getitem_148: "f32[50304, 768][768, 1]cuda:0" = _foreach_sub[0] + getitem_149: "f32[1024, 768][768, 1]cuda:0" = _foreach_sub[1] + getitem_150: "f32[768][1]cuda:0" = _foreach_sub[2] + getitem_151: "f32[768][1]cuda:0" = _foreach_sub[3] + getitem_152: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[4] + getitem_153: "f32[2304][1]cuda:0" = _foreach_sub[5] + getitem_154: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[6] + getitem_155: "f32[768][1]cuda:0" = _foreach_sub[7] + getitem_156: "f32[768][1]cuda:0" = _foreach_sub[8] + getitem_157: "f32[768][1]cuda:0" = _foreach_sub[9] + getitem_158: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[10] + getitem_159: "f32[3072][1]cuda:0" = _foreach_sub[11] + getitem_160: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[12] + getitem_161: "f32[768][1]cuda:0" = _foreach_sub[13] + getitem_162: "f32[768][1]cuda:0" = _foreach_sub[14] + getitem_163: "f32[768][1]cuda:0" = _foreach_sub[15] + getitem_164: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[16] + getitem_165: "f32[2304][1]cuda:0" = _foreach_sub[17] + getitem_166: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[18] + getitem_167: "f32[768][1]cuda:0" = _foreach_sub[19] + getitem_168: "f32[768][1]cuda:0" = _foreach_sub[20] + getitem_169: "f32[768][1]cuda:0" = _foreach_sub[21] + getitem_170: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[22] + getitem_171: "f32[3072][1]cuda:0" = _foreach_sub[23] + getitem_172: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[24] + getitem_173: "f32[768][1]cuda:0" = _foreach_sub[25] + getitem_174: "f32[768][1]cuda:0" = _foreach_sub[26] + getitem_175: "f32[768][1]cuda:0" = _foreach_sub[27] + getitem_176: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[28] + getitem_177: "f32[2304][1]cuda:0" = _foreach_sub[29] + getitem_178: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[30] + getitem_179: "f32[768][1]cuda:0" = _foreach_sub[31] + getitem_180: "f32[768][1]cuda:0" = _foreach_sub[32] + getitem_181: "f32[768][1]cuda:0" = _foreach_sub[33] + getitem_182: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[34] + getitem_183: "f32[3072][1]cuda:0" = _foreach_sub[35] + getitem_184: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[36] + getitem_185: "f32[768][1]cuda:0" = _foreach_sub[37] + getitem_186: "f32[768][1]cuda:0" = _foreach_sub[38] + getitem_187: "f32[768][1]cuda:0" = _foreach_sub[39] + getitem_188: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[40] + getitem_189: "f32[2304][1]cuda:0" = _foreach_sub[41] + getitem_190: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[42] + getitem_191: "f32[768][1]cuda:0" = _foreach_sub[43] + getitem_192: "f32[768][1]cuda:0" = _foreach_sub[44] + getitem_193: "f32[768][1]cuda:0" = _foreach_sub[45] + getitem_194: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[46] + getitem_195: "f32[3072][1]cuda:0" = _foreach_sub[47] + getitem_196: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[48] + getitem_197: "f32[768][1]cuda:0" = _foreach_sub[49] + getitem_198: "f32[768][1]cuda:0" = _foreach_sub[50] + getitem_199: "f32[768][1]cuda:0" = _foreach_sub[51] + getitem_200: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[52] + getitem_201: "f32[2304][1]cuda:0" = _foreach_sub[53] + getitem_202: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[54] + getitem_203: "f32[768][1]cuda:0" = _foreach_sub[55] + getitem_204: "f32[768][1]cuda:0" = _foreach_sub[56] + getitem_205: "f32[768][1]cuda:0" = _foreach_sub[57] + getitem_206: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[58] + getitem_207: "f32[3072][1]cuda:0" = _foreach_sub[59] + getitem_208: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[60] + getitem_209: "f32[768][1]cuda:0" = _foreach_sub[61] + getitem_210: "f32[768][1]cuda:0" = _foreach_sub[62] + getitem_211: "f32[768][1]cuda:0" = _foreach_sub[63] + getitem_212: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[64] + getitem_213: "f32[2304][1]cuda:0" = _foreach_sub[65] + getitem_214: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[66] + getitem_215: "f32[768][1]cuda:0" = _foreach_sub[67] + getitem_216: "f32[768][1]cuda:0" = _foreach_sub[68] + getitem_217: "f32[768][1]cuda:0" = _foreach_sub[69] + getitem_218: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[70] + getitem_219: "f32[3072][1]cuda:0" = _foreach_sub[71] + getitem_220: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[72] + getitem_221: "f32[768][1]cuda:0" = _foreach_sub[73] + getitem_222: "f32[768][1]cuda:0" = _foreach_sub[74] + getitem_223: "f32[768][1]cuda:0" = _foreach_sub[75] + getitem_224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[76] + getitem_225: "f32[2304][1]cuda:0" = _foreach_sub[77] + getitem_226: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[78] + getitem_227: "f32[768][1]cuda:0" = _foreach_sub[79] + getitem_228: "f32[768][1]cuda:0" = _foreach_sub[80] + getitem_229: "f32[768][1]cuda:0" = _foreach_sub[81] + getitem_230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[82] + getitem_231: "f32[3072][1]cuda:0" = _foreach_sub[83] + getitem_232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[84] + getitem_233: "f32[768][1]cuda:0" = _foreach_sub[85] + getitem_234: "f32[768][1]cuda:0" = _foreach_sub[86] + getitem_235: "f32[768][1]cuda:0" = _foreach_sub[87] + getitem_236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[88] + getitem_237: "f32[2304][1]cuda:0" = _foreach_sub[89] + getitem_238: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[90] + getitem_239: "f32[768][1]cuda:0" = _foreach_sub[91] + getitem_240: "f32[768][1]cuda:0" = _foreach_sub[92] + getitem_241: "f32[768][1]cuda:0" = _foreach_sub[93] + getitem_242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[94] + getitem_243: "f32[3072][1]cuda:0" = _foreach_sub[95] + getitem_244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[96] + getitem_245: "f32[768][1]cuda:0" = _foreach_sub[97] + getitem_246: "f32[768][1]cuda:0" = _foreach_sub[98] + getitem_247: "f32[768][1]cuda:0" = _foreach_sub[99] + getitem_248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[100] + getitem_249: "f32[2304][1]cuda:0" = _foreach_sub[101] + getitem_250: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[102] + getitem_251: "f32[768][1]cuda:0" = _foreach_sub[103] + getitem_252: "f32[768][1]cuda:0" = _foreach_sub[104] + getitem_253: "f32[768][1]cuda:0" = _foreach_sub[105] + getitem_254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[106] + getitem_255: "f32[3072][1]cuda:0" = _foreach_sub[107] + getitem_256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[108] + getitem_257: "f32[768][1]cuda:0" = _foreach_sub[109] + getitem_258: "f32[768][1]cuda:0" = _foreach_sub[110] + getitem_259: "f32[768][1]cuda:0" = _foreach_sub[111] + getitem_260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[112] + getitem_261: "f32[2304][1]cuda:0" = _foreach_sub[113] + getitem_262: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[114] + getitem_263: "f32[768][1]cuda:0" = _foreach_sub[115] + getitem_264: "f32[768][1]cuda:0" = _foreach_sub[116] + getitem_265: "f32[768][1]cuda:0" = _foreach_sub[117] + getitem_266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[118] + getitem_267: "f32[3072][1]cuda:0" = _foreach_sub[119] + getitem_268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[120] + getitem_269: "f32[768][1]cuda:0" = _foreach_sub[121] + getitem_270: "f32[768][1]cuda:0" = _foreach_sub[122] + getitem_271: "f32[768][1]cuda:0" = _foreach_sub[123] + getitem_272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[124] + getitem_273: "f32[2304][1]cuda:0" = _foreach_sub[125] + getitem_274: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[126] + getitem_275: "f32[768][1]cuda:0" = _foreach_sub[127] + getitem_276: "f32[768][1]cuda:0" = _foreach_sub[128] + getitem_277: "f32[768][1]cuda:0" = _foreach_sub[129] + getitem_278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[130] + getitem_279: "f32[3072][1]cuda:0" = _foreach_sub[131] + getitem_280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[132] + getitem_281: "f32[768][1]cuda:0" = _foreach_sub[133] + getitem_282: "f32[768][1]cuda:0" = _foreach_sub[134] + getitem_283: "f32[768][1]cuda:0" = _foreach_sub[135] + getitem_284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[136] + getitem_285: "f32[2304][1]cuda:0" = _foreach_sub[137] + getitem_286: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[138] + getitem_287: "f32[768][1]cuda:0" = _foreach_sub[139] + getitem_288: "f32[768][1]cuda:0" = _foreach_sub[140] + getitem_289: "f32[768][1]cuda:0" = _foreach_sub[141] + getitem_290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[142] + getitem_291: "f32[3072][1]cuda:0" = _foreach_sub[143] + getitem_292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[144] + getitem_293: "f32[768][1]cuda:0" = _foreach_sub[145] + getitem_294: "f32[768][1]cuda:0" = _foreach_sub[146] + getitem_295: "f32[768][1]cuda:0" = _foreach_sub[147]; _foreach_sub = None + _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998); getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None + getitem_296: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul[0] + getitem_297: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul[1] + getitem_298: "f32[768][1]cuda:0" = _foreach_mul[2] + getitem_299: "f32[768][1]cuda:0" = _foreach_mul[3] + getitem_300: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[4] + getitem_301: "f32[2304][1]cuda:0" = _foreach_mul[5] + getitem_302: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[6] + getitem_303: "f32[768][1]cuda:0" = _foreach_mul[7] + getitem_304: "f32[768][1]cuda:0" = _foreach_mul[8] + getitem_305: "f32[768][1]cuda:0" = _foreach_mul[9] + getitem_306: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[10] + getitem_307: "f32[3072][1]cuda:0" = _foreach_mul[11] + getitem_308: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[12] + getitem_309: "f32[768][1]cuda:0" = _foreach_mul[13] + getitem_310: "f32[768][1]cuda:0" = _foreach_mul[14] + getitem_311: "f32[768][1]cuda:0" = _foreach_mul[15] + getitem_312: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[16] + getitem_313: "f32[2304][1]cuda:0" = _foreach_mul[17] + getitem_314: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[18] + getitem_315: "f32[768][1]cuda:0" = _foreach_mul[19] + getitem_316: "f32[768][1]cuda:0" = _foreach_mul[20] + getitem_317: "f32[768][1]cuda:0" = _foreach_mul[21] + getitem_318: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[22] + getitem_319: "f32[3072][1]cuda:0" = _foreach_mul[23] + getitem_320: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[24] + getitem_321: "f32[768][1]cuda:0" = _foreach_mul[25] + getitem_322: "f32[768][1]cuda:0" = _foreach_mul[26] + getitem_323: "f32[768][1]cuda:0" = _foreach_mul[27] + getitem_324: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[28] + getitem_325: "f32[2304][1]cuda:0" = _foreach_mul[29] + getitem_326: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[30] + getitem_327: "f32[768][1]cuda:0" = _foreach_mul[31] + getitem_328: "f32[768][1]cuda:0" = _foreach_mul[32] + getitem_329: "f32[768][1]cuda:0" = _foreach_mul[33] + getitem_330: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[34] + getitem_331: "f32[3072][1]cuda:0" = _foreach_mul[35] + getitem_332: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[36] + getitem_333: "f32[768][1]cuda:0" = _foreach_mul[37] + getitem_334: "f32[768][1]cuda:0" = _foreach_mul[38] + getitem_335: "f32[768][1]cuda:0" = _foreach_mul[39] + getitem_336: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[40] + getitem_337: "f32[2304][1]cuda:0" = _foreach_mul[41] + getitem_338: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[42] + getitem_339: "f32[768][1]cuda:0" = _foreach_mul[43] + getitem_340: "f32[768][1]cuda:0" = _foreach_mul[44] + getitem_341: "f32[768][1]cuda:0" = _foreach_mul[45] + getitem_342: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[46] + getitem_343: "f32[3072][1]cuda:0" = _foreach_mul[47] + getitem_344: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[48] + getitem_345: "f32[768][1]cuda:0" = _foreach_mul[49] + getitem_346: "f32[768][1]cuda:0" = _foreach_mul[50] + getitem_347: "f32[768][1]cuda:0" = _foreach_mul[51] + getitem_348: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[52] + getitem_349: "f32[2304][1]cuda:0" = _foreach_mul[53] + getitem_350: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[54] + getitem_351: "f32[768][1]cuda:0" = _foreach_mul[55] + getitem_352: "f32[768][1]cuda:0" = _foreach_mul[56] + getitem_353: "f32[768][1]cuda:0" = _foreach_mul[57] + getitem_354: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[58] + getitem_355: "f32[3072][1]cuda:0" = _foreach_mul[59] + getitem_356: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[60] + getitem_357: "f32[768][1]cuda:0" = _foreach_mul[61] + getitem_358: "f32[768][1]cuda:0" = _foreach_mul[62] + getitem_359: "f32[768][1]cuda:0" = _foreach_mul[63] + getitem_360: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[64] + getitem_361: "f32[2304][1]cuda:0" = _foreach_mul[65] + getitem_362: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[66] + getitem_363: "f32[768][1]cuda:0" = _foreach_mul[67] + getitem_364: "f32[768][1]cuda:0" = _foreach_mul[68] + getitem_365: "f32[768][1]cuda:0" = _foreach_mul[69] + getitem_366: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[70] + getitem_367: "f32[3072][1]cuda:0" = _foreach_mul[71] + getitem_368: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[72] + getitem_369: "f32[768][1]cuda:0" = _foreach_mul[73] + getitem_370: "f32[768][1]cuda:0" = _foreach_mul[74] + getitem_371: "f32[768][1]cuda:0" = _foreach_mul[75] + getitem_372: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[76] + getitem_373: "f32[2304][1]cuda:0" = _foreach_mul[77] + getitem_374: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[78] + getitem_375: "f32[768][1]cuda:0" = _foreach_mul[79] + getitem_376: "f32[768][1]cuda:0" = _foreach_mul[80] + getitem_377: "f32[768][1]cuda:0" = _foreach_mul[81] + getitem_378: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[82] + getitem_379: "f32[3072][1]cuda:0" = _foreach_mul[83] + getitem_380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[84] + getitem_381: "f32[768][1]cuda:0" = _foreach_mul[85] + getitem_382: "f32[768][1]cuda:0" = _foreach_mul[86] + getitem_383: "f32[768][1]cuda:0" = _foreach_mul[87] + getitem_384: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[88] + getitem_385: "f32[2304][1]cuda:0" = _foreach_mul[89] + getitem_386: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[90] + getitem_387: "f32[768][1]cuda:0" = _foreach_mul[91] + getitem_388: "f32[768][1]cuda:0" = _foreach_mul[92] + getitem_389: "f32[768][1]cuda:0" = _foreach_mul[93] + getitem_390: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[94] + getitem_391: "f32[3072][1]cuda:0" = _foreach_mul[95] + getitem_392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[96] + getitem_393: "f32[768][1]cuda:0" = _foreach_mul[97] + getitem_394: "f32[768][1]cuda:0" = _foreach_mul[98] + getitem_395: "f32[768][1]cuda:0" = _foreach_mul[99] + getitem_396: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[100] + getitem_397: "f32[2304][1]cuda:0" = _foreach_mul[101] + getitem_398: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[102] + getitem_399: "f32[768][1]cuda:0" = _foreach_mul[103] + getitem_400: "f32[768][1]cuda:0" = _foreach_mul[104] + getitem_401: "f32[768][1]cuda:0" = _foreach_mul[105] + getitem_402: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[106] + getitem_403: "f32[3072][1]cuda:0" = _foreach_mul[107] + getitem_404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[108] + getitem_405: "f32[768][1]cuda:0" = _foreach_mul[109] + getitem_406: "f32[768][1]cuda:0" = _foreach_mul[110] + getitem_407: "f32[768][1]cuda:0" = _foreach_mul[111] + getitem_408: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[112] + getitem_409: "f32[2304][1]cuda:0" = _foreach_mul[113] + getitem_410: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[114] + getitem_411: "f32[768][1]cuda:0" = _foreach_mul[115] + getitem_412: "f32[768][1]cuda:0" = _foreach_mul[116] + getitem_413: "f32[768][1]cuda:0" = _foreach_mul[117] + getitem_414: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[118] + getitem_415: "f32[3072][1]cuda:0" = _foreach_mul[119] + getitem_416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[120] + getitem_417: "f32[768][1]cuda:0" = _foreach_mul[121] + getitem_418: "f32[768][1]cuda:0" = _foreach_mul[122] + getitem_419: "f32[768][1]cuda:0" = _foreach_mul[123] + getitem_420: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[124] + getitem_421: "f32[2304][1]cuda:0" = _foreach_mul[125] + getitem_422: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[126] + getitem_423: "f32[768][1]cuda:0" = _foreach_mul[127] + getitem_424: "f32[768][1]cuda:0" = _foreach_mul[128] + getitem_425: "f32[768][1]cuda:0" = _foreach_mul[129] + getitem_426: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[130] + getitem_427: "f32[3072][1]cuda:0" = _foreach_mul[131] + getitem_428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[132] + getitem_429: "f32[768][1]cuda:0" = _foreach_mul[133] + getitem_430: "f32[768][1]cuda:0" = _foreach_mul[134] + getitem_431: "f32[768][1]cuda:0" = _foreach_mul[135] + getitem_432: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[136] + getitem_433: "f32[2304][1]cuda:0" = _foreach_mul[137] + getitem_434: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[138] + getitem_435: "f32[768][1]cuda:0" = _foreach_mul[139] + getitem_436: "f32[768][1]cuda:0" = _foreach_mul[140] + getitem_437: "f32[768][1]cuda:0" = _foreach_mul[141] + getitem_438: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[142] + getitem_439: "f32[3072][1]cuda:0" = _foreach_mul[143] + getitem_440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[144] + getitem_441: "f32[768][1]cuda:0" = _foreach_mul[145] + getitem_442: "f32[768][1]cuda:0" = _foreach_mul[146] + getitem_443: "f32[768][1]cuda:0" = _foreach_mul[147]; _foreach_mul = None + _foreach_add_1 = torch.ops.aten._foreach_add_.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]); arg299_1 = arg149_1 = arg300_1 = arg301_1 = arg302_1 = arg303_1 = arg304_1 = arg305_1 = arg306_1 = arg307_1 = arg308_1 = arg309_1 = arg310_1 = arg311_1 = arg312_1 = arg313_1 = arg314_1 = arg315_1 = arg316_1 = arg317_1 = arg318_1 = arg319_1 = arg320_1 = arg321_1 = arg322_1 = arg323_1 = arg324_1 = arg325_1 = arg326_1 = arg327_1 = arg328_1 = arg329_1 = arg330_1 = arg331_1 = arg332_1 = arg333_1 = arg334_1 = arg335_1 = arg336_1 = arg337_1 = arg338_1 = arg339_1 = arg340_1 = arg341_1 = arg342_1 = arg343_1 = arg344_1 = arg345_1 = arg346_1 = arg347_1 = arg348_1 = arg349_1 = arg350_1 = arg351_1 = arg352_1 = arg353_1 = arg354_1 = arg355_1 = arg356_1 = arg357_1 = arg358_1 = arg359_1 = arg360_1 = arg361_1 = arg362_1 = arg363_1 = arg364_1 = arg365_1 = arg366_1 = arg367_1 = arg368_1 = arg369_1 = arg370_1 = arg371_1 = arg372_1 = arg373_1 = arg374_1 = arg375_1 = arg376_1 = arg377_1 = arg378_1 = arg379_1 = arg380_1 = arg381_1 = arg382_1 = arg383_1 = arg384_1 = arg385_1 = arg386_1 = arg387_1 = arg388_1 = arg389_1 = arg390_1 = arg391_1 = arg392_1 = arg393_1 = arg394_1 = arg395_1 = arg396_1 = arg397_1 = arg398_1 = arg399_1 = arg400_1 = arg401_1 = arg402_1 = arg403_1 = arg404_1 = arg405_1 = arg406_1 = arg407_1 = arg408_1 = arg409_1 = arg410_1 = arg411_1 = arg412_1 = arg413_1 = arg414_1 = arg415_1 = arg416_1 = arg417_1 = arg418_1 = arg419_1 = arg420_1 = arg421_1 = arg422_1 = arg423_1 = arg424_1 = arg425_1 = arg426_1 = arg427_1 = arg428_1 = arg429_1 = arg430_1 = arg431_1 = arg432_1 = arg433_1 = arg434_1 = arg435_1 = arg436_1 = arg437_1 = arg438_1 = arg439_1 = arg440_1 = arg441_1 = arg442_1 = arg443_1 = arg444_1 = arg445_1 = getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None + getitem_444: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_1[0] + getitem_445: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_1[1] + getitem_446: "f32[768][1]cuda:0" = _foreach_add_1[2] + getitem_447: "f32[768][1]cuda:0" = _foreach_add_1[3] + getitem_448: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[4] + getitem_449: "f32[2304][1]cuda:0" = _foreach_add_1[5] + getitem_450: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[6] + getitem_451: "f32[768][1]cuda:0" = _foreach_add_1[7] + getitem_452: "f32[768][1]cuda:0" = _foreach_add_1[8] + getitem_453: "f32[768][1]cuda:0" = _foreach_add_1[9] + getitem_454: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[10] + getitem_455: "f32[3072][1]cuda:0" = _foreach_add_1[11] + getitem_456: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[12] + getitem_457: "f32[768][1]cuda:0" = _foreach_add_1[13] + getitem_458: "f32[768][1]cuda:0" = _foreach_add_1[14] + getitem_459: "f32[768][1]cuda:0" = _foreach_add_1[15] + getitem_460: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[16] + getitem_461: "f32[2304][1]cuda:0" = _foreach_add_1[17] + getitem_462: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[18] + getitem_463: "f32[768][1]cuda:0" = _foreach_add_1[19] + getitem_464: "f32[768][1]cuda:0" = _foreach_add_1[20] + getitem_465: "f32[768][1]cuda:0" = _foreach_add_1[21] + getitem_466: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[22] + getitem_467: "f32[3072][1]cuda:0" = _foreach_add_1[23] + getitem_468: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[24] + getitem_469: "f32[768][1]cuda:0" = _foreach_add_1[25] + getitem_470: "f32[768][1]cuda:0" = _foreach_add_1[26] + getitem_471: "f32[768][1]cuda:0" = _foreach_add_1[27] + getitem_472: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[28] + getitem_473: "f32[2304][1]cuda:0" = _foreach_add_1[29] + getitem_474: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[30] + getitem_475: "f32[768][1]cuda:0" = _foreach_add_1[31] + getitem_476: "f32[768][1]cuda:0" = _foreach_add_1[32] + getitem_477: "f32[768][1]cuda:0" = _foreach_add_1[33] + getitem_478: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[34] + getitem_479: "f32[3072][1]cuda:0" = _foreach_add_1[35] + getitem_480: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[36] + getitem_481: "f32[768][1]cuda:0" = _foreach_add_1[37] + getitem_482: "f32[768][1]cuda:0" = _foreach_add_1[38] + getitem_483: "f32[768][1]cuda:0" = _foreach_add_1[39] + getitem_484: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[40] + getitem_485: "f32[2304][1]cuda:0" = _foreach_add_1[41] + getitem_486: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[42] + getitem_487: "f32[768][1]cuda:0" = _foreach_add_1[43] + getitem_488: "f32[768][1]cuda:0" = _foreach_add_1[44] + getitem_489: "f32[768][1]cuda:0" = _foreach_add_1[45] + getitem_490: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[46] + getitem_491: "f32[3072][1]cuda:0" = _foreach_add_1[47] + getitem_492: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[48] + getitem_493: "f32[768][1]cuda:0" = _foreach_add_1[49] + getitem_494: "f32[768][1]cuda:0" = _foreach_add_1[50] + getitem_495: "f32[768][1]cuda:0" = _foreach_add_1[51] + getitem_496: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[52] + getitem_497: "f32[2304][1]cuda:0" = _foreach_add_1[53] + getitem_498: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[54] + getitem_499: "f32[768][1]cuda:0" = _foreach_add_1[55] + getitem_500: "f32[768][1]cuda:0" = _foreach_add_1[56] + getitem_501: "f32[768][1]cuda:0" = _foreach_add_1[57] + getitem_502: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[58] + getitem_503: "f32[3072][1]cuda:0" = _foreach_add_1[59] + getitem_504: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[60] + getitem_505: "f32[768][1]cuda:0" = _foreach_add_1[61] + getitem_506: "f32[768][1]cuda:0" = _foreach_add_1[62] + getitem_507: "f32[768][1]cuda:0" = _foreach_add_1[63] + getitem_508: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[64] + getitem_509: "f32[2304][1]cuda:0" = _foreach_add_1[65] + getitem_510: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[66] + getitem_511: "f32[768][1]cuda:0" = _foreach_add_1[67] + getitem_512: "f32[768][1]cuda:0" = _foreach_add_1[68] + getitem_513: "f32[768][1]cuda:0" = _foreach_add_1[69] + getitem_514: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[70] + getitem_515: "f32[3072][1]cuda:0" = _foreach_add_1[71] + getitem_516: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[72] + getitem_517: "f32[768][1]cuda:0" = _foreach_add_1[73] + getitem_518: "f32[768][1]cuda:0" = _foreach_add_1[74] + getitem_519: "f32[768][1]cuda:0" = _foreach_add_1[75] + getitem_520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[76] + getitem_521: "f32[2304][1]cuda:0" = _foreach_add_1[77] + getitem_522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[78] + getitem_523: "f32[768][1]cuda:0" = _foreach_add_1[79] + getitem_524: "f32[768][1]cuda:0" = _foreach_add_1[80] + getitem_525: "f32[768][1]cuda:0" = _foreach_add_1[81] + getitem_526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[82] + getitem_527: "f32[3072][1]cuda:0" = _foreach_add_1[83] + getitem_528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[84] + getitem_529: "f32[768][1]cuda:0" = _foreach_add_1[85] + getitem_530: "f32[768][1]cuda:0" = _foreach_add_1[86] + getitem_531: "f32[768][1]cuda:0" = _foreach_add_1[87] + getitem_532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[88] + getitem_533: "f32[2304][1]cuda:0" = _foreach_add_1[89] + getitem_534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[90] + getitem_535: "f32[768][1]cuda:0" = _foreach_add_1[91] + getitem_536: "f32[768][1]cuda:0" = _foreach_add_1[92] + getitem_537: "f32[768][1]cuda:0" = _foreach_add_1[93] + getitem_538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[94] + getitem_539: "f32[3072][1]cuda:0" = _foreach_add_1[95] + getitem_540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[96] + getitem_541: "f32[768][1]cuda:0" = _foreach_add_1[97] + getitem_542: "f32[768][1]cuda:0" = _foreach_add_1[98] + getitem_543: "f32[768][1]cuda:0" = _foreach_add_1[99] + getitem_544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[100] + getitem_545: "f32[2304][1]cuda:0" = _foreach_add_1[101] + getitem_546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[102] + getitem_547: "f32[768][1]cuda:0" = _foreach_add_1[103] + getitem_548: "f32[768][1]cuda:0" = _foreach_add_1[104] + getitem_549: "f32[768][1]cuda:0" = _foreach_add_1[105] + getitem_550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[106] + getitem_551: "f32[3072][1]cuda:0" = _foreach_add_1[107] + getitem_552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[108] + getitem_553: "f32[768][1]cuda:0" = _foreach_add_1[109] + getitem_554: "f32[768][1]cuda:0" = _foreach_add_1[110] + getitem_555: "f32[768][1]cuda:0" = _foreach_add_1[111] + getitem_556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[112] + getitem_557: "f32[2304][1]cuda:0" = _foreach_add_1[113] + getitem_558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[114] + getitem_559: "f32[768][1]cuda:0" = _foreach_add_1[115] + getitem_560: "f32[768][1]cuda:0" = _foreach_add_1[116] + getitem_561: "f32[768][1]cuda:0" = _foreach_add_1[117] + getitem_562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[118] + getitem_563: "f32[3072][1]cuda:0" = _foreach_add_1[119] + getitem_564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[120] + getitem_565: "f32[768][1]cuda:0" = _foreach_add_1[121] + getitem_566: "f32[768][1]cuda:0" = _foreach_add_1[122] + getitem_567: "f32[768][1]cuda:0" = _foreach_add_1[123] + getitem_568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[124] + getitem_569: "f32[2304][1]cuda:0" = _foreach_add_1[125] + getitem_570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[126] + getitem_571: "f32[768][1]cuda:0" = _foreach_add_1[127] + getitem_572: "f32[768][1]cuda:0" = _foreach_add_1[128] + getitem_573: "f32[768][1]cuda:0" = _foreach_add_1[129] + getitem_574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[130] + getitem_575: "f32[3072][1]cuda:0" = _foreach_add_1[131] + getitem_576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[132] + getitem_577: "f32[768][1]cuda:0" = _foreach_add_1[133] + getitem_578: "f32[768][1]cuda:0" = _foreach_add_1[134] + getitem_579: "f32[768][1]cuda:0" = _foreach_add_1[135] + getitem_580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[136] + getitem_581: "f32[2304][1]cuda:0" = _foreach_add_1[137] + getitem_582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[138] + getitem_583: "f32[768][1]cuda:0" = _foreach_add_1[139] + getitem_584: "f32[768][1]cuda:0" = _foreach_add_1[140] + getitem_585: "f32[768][1]cuda:0" = _foreach_add_1[141] + getitem_586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[142] + getitem_587: "f32[3072][1]cuda:0" = _foreach_add_1[143] + getitem_588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[144] + getitem_589: "f32[768][1]cuda:0" = _foreach_add_1[145] + getitem_590: "f32[768][1]cuda:0" = _foreach_add_1[146] + getitem_591: "f32[768][1]cuda:0" = _foreach_add_1[147]; _foreach_add_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2) + _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999) + getitem_592: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_1[0] + getitem_593: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_1[1] + getitem_594: "f32[768][1]cuda:0" = _foreach_mul_1[2] + getitem_595: "f32[768][1]cuda:0" = _foreach_mul_1[3] + getitem_596: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[4] + getitem_597: "f32[2304][1]cuda:0" = _foreach_mul_1[5] + getitem_598: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[6] + getitem_599: "f32[768][1]cuda:0" = _foreach_mul_1[7] + getitem_600: "f32[768][1]cuda:0" = _foreach_mul_1[8] + getitem_601: "f32[768][1]cuda:0" = _foreach_mul_1[9] + getitem_602: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[10] + getitem_603: "f32[3072][1]cuda:0" = _foreach_mul_1[11] + getitem_604: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[12] + getitem_605: "f32[768][1]cuda:0" = _foreach_mul_1[13] + getitem_606: "f32[768][1]cuda:0" = _foreach_mul_1[14] + getitem_607: "f32[768][1]cuda:0" = _foreach_mul_1[15] + getitem_608: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[16] + getitem_609: "f32[2304][1]cuda:0" = _foreach_mul_1[17] + getitem_610: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[18] + getitem_611: "f32[768][1]cuda:0" = _foreach_mul_1[19] + getitem_612: "f32[768][1]cuda:0" = _foreach_mul_1[20] + getitem_613: "f32[768][1]cuda:0" = _foreach_mul_1[21] + getitem_614: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[22] + getitem_615: "f32[3072][1]cuda:0" = _foreach_mul_1[23] + getitem_616: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[24] + getitem_617: "f32[768][1]cuda:0" = _foreach_mul_1[25] + getitem_618: "f32[768][1]cuda:0" = _foreach_mul_1[26] + getitem_619: "f32[768][1]cuda:0" = _foreach_mul_1[27] + getitem_620: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[28] + getitem_621: "f32[2304][1]cuda:0" = _foreach_mul_1[29] + getitem_622: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[30] + getitem_623: "f32[768][1]cuda:0" = _foreach_mul_1[31] + getitem_624: "f32[768][1]cuda:0" = _foreach_mul_1[32] + getitem_625: "f32[768][1]cuda:0" = _foreach_mul_1[33] + getitem_626: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[34] + getitem_627: "f32[3072][1]cuda:0" = _foreach_mul_1[35] + getitem_628: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[36] + getitem_629: "f32[768][1]cuda:0" = _foreach_mul_1[37] + getitem_630: "f32[768][1]cuda:0" = _foreach_mul_1[38] + getitem_631: "f32[768][1]cuda:0" = _foreach_mul_1[39] + getitem_632: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[40] + getitem_633: "f32[2304][1]cuda:0" = _foreach_mul_1[41] + getitem_634: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[42] + getitem_635: "f32[768][1]cuda:0" = _foreach_mul_1[43] + getitem_636: "f32[768][1]cuda:0" = _foreach_mul_1[44] + getitem_637: "f32[768][1]cuda:0" = _foreach_mul_1[45] + getitem_638: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[46] + getitem_639: "f32[3072][1]cuda:0" = _foreach_mul_1[47] + getitem_640: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[48] + getitem_641: "f32[768][1]cuda:0" = _foreach_mul_1[49] + getitem_642: "f32[768][1]cuda:0" = _foreach_mul_1[50] + getitem_643: "f32[768][1]cuda:0" = _foreach_mul_1[51] + getitem_644: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[52] + getitem_645: "f32[2304][1]cuda:0" = _foreach_mul_1[53] + getitem_646: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[54] + getitem_647: "f32[768][1]cuda:0" = _foreach_mul_1[55] + getitem_648: "f32[768][1]cuda:0" = _foreach_mul_1[56] + getitem_649: "f32[768][1]cuda:0" = _foreach_mul_1[57] + getitem_650: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[58] + getitem_651: "f32[3072][1]cuda:0" = _foreach_mul_1[59] + getitem_652: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[60] + getitem_653: "f32[768][1]cuda:0" = _foreach_mul_1[61] + getitem_654: "f32[768][1]cuda:0" = _foreach_mul_1[62] + getitem_655: "f32[768][1]cuda:0" = _foreach_mul_1[63] + getitem_656: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[64] + getitem_657: "f32[2304][1]cuda:0" = _foreach_mul_1[65] + getitem_658: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[66] + getitem_659: "f32[768][1]cuda:0" = _foreach_mul_1[67] + getitem_660: "f32[768][1]cuda:0" = _foreach_mul_1[68] + getitem_661: "f32[768][1]cuda:0" = _foreach_mul_1[69] + getitem_662: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[70] + getitem_663: "f32[3072][1]cuda:0" = _foreach_mul_1[71] + getitem_664: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[72] + getitem_665: "f32[768][1]cuda:0" = _foreach_mul_1[73] + getitem_666: "f32[768][1]cuda:0" = _foreach_mul_1[74] + getitem_667: "f32[768][1]cuda:0" = _foreach_mul_1[75] + getitem_668: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[76] + getitem_669: "f32[2304][1]cuda:0" = _foreach_mul_1[77] + getitem_670: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[78] + getitem_671: "f32[768][1]cuda:0" = _foreach_mul_1[79] + getitem_672: "f32[768][1]cuda:0" = _foreach_mul_1[80] + getitem_673: "f32[768][1]cuda:0" = _foreach_mul_1[81] + getitem_674: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[82] + getitem_675: "f32[3072][1]cuda:0" = _foreach_mul_1[83] + getitem_676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[84] + getitem_677: "f32[768][1]cuda:0" = _foreach_mul_1[85] + getitem_678: "f32[768][1]cuda:0" = _foreach_mul_1[86] + getitem_679: "f32[768][1]cuda:0" = _foreach_mul_1[87] + getitem_680: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[88] + getitem_681: "f32[2304][1]cuda:0" = _foreach_mul_1[89] + getitem_682: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[90] + getitem_683: "f32[768][1]cuda:0" = _foreach_mul_1[91] + getitem_684: "f32[768][1]cuda:0" = _foreach_mul_1[92] + getitem_685: "f32[768][1]cuda:0" = _foreach_mul_1[93] + getitem_686: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[94] + getitem_687: "f32[3072][1]cuda:0" = _foreach_mul_1[95] + getitem_688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[96] + getitem_689: "f32[768][1]cuda:0" = _foreach_mul_1[97] + getitem_690: "f32[768][1]cuda:0" = _foreach_mul_1[98] + getitem_691: "f32[768][1]cuda:0" = _foreach_mul_1[99] + getitem_692: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[100] + getitem_693: "f32[2304][1]cuda:0" = _foreach_mul_1[101] + getitem_694: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[102] + getitem_695: "f32[768][1]cuda:0" = _foreach_mul_1[103] + getitem_696: "f32[768][1]cuda:0" = _foreach_mul_1[104] + getitem_697: "f32[768][1]cuda:0" = _foreach_mul_1[105] + getitem_698: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[106] + getitem_699: "f32[3072][1]cuda:0" = _foreach_mul_1[107] + getitem_700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[108] + getitem_701: "f32[768][1]cuda:0" = _foreach_mul_1[109] + getitem_702: "f32[768][1]cuda:0" = _foreach_mul_1[110] + getitem_703: "f32[768][1]cuda:0" = _foreach_mul_1[111] + getitem_704: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[112] + getitem_705: "f32[2304][1]cuda:0" = _foreach_mul_1[113] + getitem_706: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[114] + getitem_707: "f32[768][1]cuda:0" = _foreach_mul_1[115] + getitem_708: "f32[768][1]cuda:0" = _foreach_mul_1[116] + getitem_709: "f32[768][1]cuda:0" = _foreach_mul_1[117] + getitem_710: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[118] + getitem_711: "f32[3072][1]cuda:0" = _foreach_mul_1[119] + getitem_712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[120] + getitem_713: "f32[768][1]cuda:0" = _foreach_mul_1[121] + getitem_714: "f32[768][1]cuda:0" = _foreach_mul_1[122] + getitem_715: "f32[768][1]cuda:0" = _foreach_mul_1[123] + getitem_716: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[124] + getitem_717: "f32[2304][1]cuda:0" = _foreach_mul_1[125] + getitem_718: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[126] + getitem_719: "f32[768][1]cuda:0" = _foreach_mul_1[127] + getitem_720: "f32[768][1]cuda:0" = _foreach_mul_1[128] + getitem_721: "f32[768][1]cuda:0" = _foreach_mul_1[129] + getitem_722: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[130] + getitem_723: "f32[3072][1]cuda:0" = _foreach_mul_1[131] + getitem_724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[132] + getitem_725: "f32[768][1]cuda:0" = _foreach_mul_1[133] + getitem_726: "f32[768][1]cuda:0" = _foreach_mul_1[134] + getitem_727: "f32[768][1]cuda:0" = _foreach_mul_1[135] + getitem_728: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[136] + getitem_729: "f32[2304][1]cuda:0" = _foreach_mul_1[137] + getitem_730: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[138] + getitem_731: "f32[768][1]cuda:0" = _foreach_mul_1[139] + getitem_732: "f32[768][1]cuda:0" = _foreach_mul_1[140] + getitem_733: "f32[768][1]cuda:0" = _foreach_mul_1[141] + getitem_734: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[142] + getitem_735: "f32[3072][1]cuda:0" = _foreach_mul_1[143] + getitem_736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[144] + getitem_737: "f32[768][1]cuda:0" = _foreach_mul_1[145] + getitem_738: "f32[768][1]cuda:0" = _foreach_mul_1[146] + getitem_739: "f32[768][1]cuda:0" = _foreach_mul_1[147]; _foreach_mul_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]); arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None + getitem_740: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_2[0] + getitem_741: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_2[1] + getitem_742: "f32[768][1]cuda:0" = _foreach_mul_2[2] + getitem_743: "f32[768][1]cuda:0" = _foreach_mul_2[3] + getitem_744: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[4] + getitem_745: "f32[2304][1]cuda:0" = _foreach_mul_2[5] + getitem_746: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[6] + getitem_747: "f32[768][1]cuda:0" = _foreach_mul_2[7] + getitem_748: "f32[768][1]cuda:0" = _foreach_mul_2[8] + getitem_749: "f32[768][1]cuda:0" = _foreach_mul_2[9] + getitem_750: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[10] + getitem_751: "f32[3072][1]cuda:0" = _foreach_mul_2[11] + getitem_752: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[12] + getitem_753: "f32[768][1]cuda:0" = _foreach_mul_2[13] + getitem_754: "f32[768][1]cuda:0" = _foreach_mul_2[14] + getitem_755: "f32[768][1]cuda:0" = _foreach_mul_2[15] + getitem_756: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[16] + getitem_757: "f32[2304][1]cuda:0" = _foreach_mul_2[17] + getitem_758: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[18] + getitem_759: "f32[768][1]cuda:0" = _foreach_mul_2[19] + getitem_760: "f32[768][1]cuda:0" = _foreach_mul_2[20] + getitem_761: "f32[768][1]cuda:0" = _foreach_mul_2[21] + getitem_762: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[22] + getitem_763: "f32[3072][1]cuda:0" = _foreach_mul_2[23] + getitem_764: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[24] + getitem_765: "f32[768][1]cuda:0" = _foreach_mul_2[25] + getitem_766: "f32[768][1]cuda:0" = _foreach_mul_2[26] + getitem_767: "f32[768][1]cuda:0" = _foreach_mul_2[27] + getitem_768: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[28] + getitem_769: "f32[2304][1]cuda:0" = _foreach_mul_2[29] + getitem_770: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[30] + getitem_771: "f32[768][1]cuda:0" = _foreach_mul_2[31] + getitem_772: "f32[768][1]cuda:0" = _foreach_mul_2[32] + getitem_773: "f32[768][1]cuda:0" = _foreach_mul_2[33] + getitem_774: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[34] + getitem_775: "f32[3072][1]cuda:0" = _foreach_mul_2[35] + getitem_776: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[36] + getitem_777: "f32[768][1]cuda:0" = _foreach_mul_2[37] + getitem_778: "f32[768][1]cuda:0" = _foreach_mul_2[38] + getitem_779: "f32[768][1]cuda:0" = _foreach_mul_2[39] + getitem_780: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[40] + getitem_781: "f32[2304][1]cuda:0" = _foreach_mul_2[41] + getitem_782: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[42] + getitem_783: "f32[768][1]cuda:0" = _foreach_mul_2[43] + getitem_784: "f32[768][1]cuda:0" = _foreach_mul_2[44] + getitem_785: "f32[768][1]cuda:0" = _foreach_mul_2[45] + getitem_786: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[46] + getitem_787: "f32[3072][1]cuda:0" = _foreach_mul_2[47] + getitem_788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[48] + getitem_789: "f32[768][1]cuda:0" = _foreach_mul_2[49] + getitem_790: "f32[768][1]cuda:0" = _foreach_mul_2[50] + getitem_791: "f32[768][1]cuda:0" = _foreach_mul_2[51] + getitem_792: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[52] + getitem_793: "f32[2304][1]cuda:0" = _foreach_mul_2[53] + getitem_794: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[54] + getitem_795: "f32[768][1]cuda:0" = _foreach_mul_2[55] + getitem_796: "f32[768][1]cuda:0" = _foreach_mul_2[56] + getitem_797: "f32[768][1]cuda:0" = _foreach_mul_2[57] + getitem_798: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[58] + getitem_799: "f32[3072][1]cuda:0" = _foreach_mul_2[59] + getitem_800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[60] + getitem_801: "f32[768][1]cuda:0" = _foreach_mul_2[61] + getitem_802: "f32[768][1]cuda:0" = _foreach_mul_2[62] + getitem_803: "f32[768][1]cuda:0" = _foreach_mul_2[63] + getitem_804: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[64] + getitem_805: "f32[2304][1]cuda:0" = _foreach_mul_2[65] + getitem_806: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[66] + getitem_807: "f32[768][1]cuda:0" = _foreach_mul_2[67] + getitem_808: "f32[768][1]cuda:0" = _foreach_mul_2[68] + getitem_809: "f32[768][1]cuda:0" = _foreach_mul_2[69] + getitem_810: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[70] + getitem_811: "f32[3072][1]cuda:0" = _foreach_mul_2[71] + getitem_812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[72] + getitem_813: "f32[768][1]cuda:0" = _foreach_mul_2[73] + getitem_814: "f32[768][1]cuda:0" = _foreach_mul_2[74] + getitem_815: "f32[768][1]cuda:0" = _foreach_mul_2[75] + getitem_816: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[76] + getitem_817: "f32[2304][1]cuda:0" = _foreach_mul_2[77] + getitem_818: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[78] + getitem_819: "f32[768][1]cuda:0" = _foreach_mul_2[79] + getitem_820: "f32[768][1]cuda:0" = _foreach_mul_2[80] + getitem_821: "f32[768][1]cuda:0" = _foreach_mul_2[81] + getitem_822: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[82] + getitem_823: "f32[3072][1]cuda:0" = _foreach_mul_2[83] + getitem_824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[84] + getitem_825: "f32[768][1]cuda:0" = _foreach_mul_2[85] + getitem_826: "f32[768][1]cuda:0" = _foreach_mul_2[86] + getitem_827: "f32[768][1]cuda:0" = _foreach_mul_2[87] + getitem_828: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[88] + getitem_829: "f32[2304][1]cuda:0" = _foreach_mul_2[89] + getitem_830: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[90] + getitem_831: "f32[768][1]cuda:0" = _foreach_mul_2[91] + getitem_832: "f32[768][1]cuda:0" = _foreach_mul_2[92] + getitem_833: "f32[768][1]cuda:0" = _foreach_mul_2[93] + getitem_834: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[94] + getitem_835: "f32[3072][1]cuda:0" = _foreach_mul_2[95] + getitem_836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[96] + getitem_837: "f32[768][1]cuda:0" = _foreach_mul_2[97] + getitem_838: "f32[768][1]cuda:0" = _foreach_mul_2[98] + getitem_839: "f32[768][1]cuda:0" = _foreach_mul_2[99] + getitem_840: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[100] + getitem_841: "f32[2304][1]cuda:0" = _foreach_mul_2[101] + getitem_842: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[102] + getitem_843: "f32[768][1]cuda:0" = _foreach_mul_2[103] + getitem_844: "f32[768][1]cuda:0" = _foreach_mul_2[104] + getitem_845: "f32[768][1]cuda:0" = _foreach_mul_2[105] + getitem_846: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[106] + getitem_847: "f32[3072][1]cuda:0" = _foreach_mul_2[107] + getitem_848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[108] + getitem_849: "f32[768][1]cuda:0" = _foreach_mul_2[109] + getitem_850: "f32[768][1]cuda:0" = _foreach_mul_2[110] + getitem_851: "f32[768][1]cuda:0" = _foreach_mul_2[111] + getitem_852: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[112] + getitem_853: "f32[2304][1]cuda:0" = _foreach_mul_2[113] + getitem_854: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[114] + getitem_855: "f32[768][1]cuda:0" = _foreach_mul_2[115] + getitem_856: "f32[768][1]cuda:0" = _foreach_mul_2[116] + getitem_857: "f32[768][1]cuda:0" = _foreach_mul_2[117] + getitem_858: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[118] + getitem_859: "f32[3072][1]cuda:0" = _foreach_mul_2[119] + getitem_860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[120] + getitem_861: "f32[768][1]cuda:0" = _foreach_mul_2[121] + getitem_862: "f32[768][1]cuda:0" = _foreach_mul_2[122] + getitem_863: "f32[768][1]cuda:0" = _foreach_mul_2[123] + getitem_864: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[124] + getitem_865: "f32[2304][1]cuda:0" = _foreach_mul_2[125] + getitem_866: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[126] + getitem_867: "f32[768][1]cuda:0" = _foreach_mul_2[127] + getitem_868: "f32[768][1]cuda:0" = _foreach_mul_2[128] + getitem_869: "f32[768][1]cuda:0" = _foreach_mul_2[129] + getitem_870: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[130] + getitem_871: "f32[3072][1]cuda:0" = _foreach_mul_2[131] + getitem_872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[132] + getitem_873: "f32[768][1]cuda:0" = _foreach_mul_2[133] + getitem_874: "f32[768][1]cuda:0" = _foreach_mul_2[134] + getitem_875: "f32[768][1]cuda:0" = _foreach_mul_2[135] + getitem_876: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[136] + getitem_877: "f32[2304][1]cuda:0" = _foreach_mul_2[137] + getitem_878: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[138] + getitem_879: "f32[768][1]cuda:0" = _foreach_mul_2[139] + getitem_880: "f32[768][1]cuda:0" = _foreach_mul_2[140] + getitem_881: "f32[768][1]cuda:0" = _foreach_mul_2[141] + getitem_882: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[142] + getitem_883: "f32[3072][1]cuda:0" = _foreach_mul_2[143] + getitem_884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[144] + getitem_885: "f32[768][1]cuda:0" = _foreach_mul_2[145] + getitem_886: "f32[768][1]cuda:0" = _foreach_mul_2[146] + getitem_887: "f32[768][1]cuda:0" = _foreach_mul_2[147]; _foreach_mul_2 = None + _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None + getitem_888: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_2[0] + getitem_889: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_2[1] + getitem_890: "f32[768][1]cuda:0" = _foreach_add_2[2] + getitem_891: "f32[768][1]cuda:0" = _foreach_add_2[3] + getitem_892: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[4] + getitem_893: "f32[2304][1]cuda:0" = _foreach_add_2[5] + getitem_894: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[6] + getitem_895: "f32[768][1]cuda:0" = _foreach_add_2[7] + getitem_896: "f32[768][1]cuda:0" = _foreach_add_2[8] + getitem_897: "f32[768][1]cuda:0" = _foreach_add_2[9] + getitem_898: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[10] + getitem_899: "f32[3072][1]cuda:0" = _foreach_add_2[11] + getitem_900: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[12] + getitem_901: "f32[768][1]cuda:0" = _foreach_add_2[13] + getitem_902: "f32[768][1]cuda:0" = _foreach_add_2[14] + getitem_903: "f32[768][1]cuda:0" = _foreach_add_2[15] + getitem_904: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[16] + getitem_905: "f32[2304][1]cuda:0" = _foreach_add_2[17] + getitem_906: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[18] + getitem_907: "f32[768][1]cuda:0" = _foreach_add_2[19] + getitem_908: "f32[768][1]cuda:0" = _foreach_add_2[20] + getitem_909: "f32[768][1]cuda:0" = _foreach_add_2[21] + getitem_910: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[22] + getitem_911: "f32[3072][1]cuda:0" = _foreach_add_2[23] + getitem_912: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[24] + getitem_913: "f32[768][1]cuda:0" = _foreach_add_2[25] + getitem_914: "f32[768][1]cuda:0" = _foreach_add_2[26] + getitem_915: "f32[768][1]cuda:0" = _foreach_add_2[27] + getitem_916: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[28] + getitem_917: "f32[2304][1]cuda:0" = _foreach_add_2[29] + getitem_918: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[30] + getitem_919: "f32[768][1]cuda:0" = _foreach_add_2[31] + getitem_920: "f32[768][1]cuda:0" = _foreach_add_2[32] + getitem_921: "f32[768][1]cuda:0" = _foreach_add_2[33] + getitem_922: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[34] + getitem_923: "f32[3072][1]cuda:0" = _foreach_add_2[35] + getitem_924: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[36] + getitem_925: "f32[768][1]cuda:0" = _foreach_add_2[37] + getitem_926: "f32[768][1]cuda:0" = _foreach_add_2[38] + getitem_927: "f32[768][1]cuda:0" = _foreach_add_2[39] + getitem_928: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[40] + getitem_929: "f32[2304][1]cuda:0" = _foreach_add_2[41] + getitem_930: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[42] + getitem_931: "f32[768][1]cuda:0" = _foreach_add_2[43] + getitem_932: "f32[768][1]cuda:0" = _foreach_add_2[44] + getitem_933: "f32[768][1]cuda:0" = _foreach_add_2[45] + getitem_934: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[46] + getitem_935: "f32[3072][1]cuda:0" = _foreach_add_2[47] + getitem_936: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[48] + getitem_937: "f32[768][1]cuda:0" = _foreach_add_2[49] + getitem_938: "f32[768][1]cuda:0" = _foreach_add_2[50] + getitem_939: "f32[768][1]cuda:0" = _foreach_add_2[51] + getitem_940: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[52] + getitem_941: "f32[2304][1]cuda:0" = _foreach_add_2[53] + getitem_942: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[54] + getitem_943: "f32[768][1]cuda:0" = _foreach_add_2[55] + getitem_944: "f32[768][1]cuda:0" = _foreach_add_2[56] + getitem_945: "f32[768][1]cuda:0" = _foreach_add_2[57] + getitem_946: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[58] + getitem_947: "f32[3072][1]cuda:0" = _foreach_add_2[59] + getitem_948: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[60] + getitem_949: "f32[768][1]cuda:0" = _foreach_add_2[61] + getitem_950: "f32[768][1]cuda:0" = _foreach_add_2[62] + getitem_951: "f32[768][1]cuda:0" = _foreach_add_2[63] + getitem_952: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[64] + getitem_953: "f32[2304][1]cuda:0" = _foreach_add_2[65] + getitem_954: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[66] + getitem_955: "f32[768][1]cuda:0" = _foreach_add_2[67] + getitem_956: "f32[768][1]cuda:0" = _foreach_add_2[68] + getitem_957: "f32[768][1]cuda:0" = _foreach_add_2[69] + getitem_958: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[70] + getitem_959: "f32[3072][1]cuda:0" = _foreach_add_2[71] + getitem_960: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[72] + getitem_961: "f32[768][1]cuda:0" = _foreach_add_2[73] + getitem_962: "f32[768][1]cuda:0" = _foreach_add_2[74] + getitem_963: "f32[768][1]cuda:0" = _foreach_add_2[75] + getitem_964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[76] + getitem_965: "f32[2304][1]cuda:0" = _foreach_add_2[77] + getitem_966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[78] + getitem_967: "f32[768][1]cuda:0" = _foreach_add_2[79] + getitem_968: "f32[768][1]cuda:0" = _foreach_add_2[80] + getitem_969: "f32[768][1]cuda:0" = _foreach_add_2[81] + getitem_970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[82] + getitem_971: "f32[3072][1]cuda:0" = _foreach_add_2[83] + getitem_972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[84] + getitem_973: "f32[768][1]cuda:0" = _foreach_add_2[85] + getitem_974: "f32[768][1]cuda:0" = _foreach_add_2[86] + getitem_975: "f32[768][1]cuda:0" = _foreach_add_2[87] + getitem_976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[88] + getitem_977: "f32[2304][1]cuda:0" = _foreach_add_2[89] + getitem_978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[90] + getitem_979: "f32[768][1]cuda:0" = _foreach_add_2[91] + getitem_980: "f32[768][1]cuda:0" = _foreach_add_2[92] + getitem_981: "f32[768][1]cuda:0" = _foreach_add_2[93] + getitem_982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[94] + getitem_983: "f32[3072][1]cuda:0" = _foreach_add_2[95] + getitem_984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[96] + getitem_985: "f32[768][1]cuda:0" = _foreach_add_2[97] + getitem_986: "f32[768][1]cuda:0" = _foreach_add_2[98] + getitem_987: "f32[768][1]cuda:0" = _foreach_add_2[99] + getitem_988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[100] + getitem_989: "f32[2304][1]cuda:0" = _foreach_add_2[101] + getitem_990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[102] + getitem_991: "f32[768][1]cuda:0" = _foreach_add_2[103] + getitem_992: "f32[768][1]cuda:0" = _foreach_add_2[104] + getitem_993: "f32[768][1]cuda:0" = _foreach_add_2[105] + getitem_994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[106] + getitem_995: "f32[3072][1]cuda:0" = _foreach_add_2[107] + getitem_996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[108] + getitem_997: "f32[768][1]cuda:0" = _foreach_add_2[109] + getitem_998: "f32[768][1]cuda:0" = _foreach_add_2[110] + getitem_999: "f32[768][1]cuda:0" = _foreach_add_2[111] + getitem_1000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[112] + getitem_1001: "f32[2304][1]cuda:0" = _foreach_add_2[113] + getitem_1002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[114] + getitem_1003: "f32[768][1]cuda:0" = _foreach_add_2[115] + getitem_1004: "f32[768][1]cuda:0" = _foreach_add_2[116] + getitem_1005: "f32[768][1]cuda:0" = _foreach_add_2[117] + getitem_1006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[118] + getitem_1007: "f32[3072][1]cuda:0" = _foreach_add_2[119] + getitem_1008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[120] + getitem_1009: "f32[768][1]cuda:0" = _foreach_add_2[121] + getitem_1010: "f32[768][1]cuda:0" = _foreach_add_2[122] + getitem_1011: "f32[768][1]cuda:0" = _foreach_add_2[123] + getitem_1012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[124] + getitem_1013: "f32[2304][1]cuda:0" = _foreach_add_2[125] + getitem_1014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[126] + getitem_1015: "f32[768][1]cuda:0" = _foreach_add_2[127] + getitem_1016: "f32[768][1]cuda:0" = _foreach_add_2[128] + getitem_1017: "f32[768][1]cuda:0" = _foreach_add_2[129] + getitem_1018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[130] + getitem_1019: "f32[3072][1]cuda:0" = _foreach_add_2[131] + getitem_1020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[132] + getitem_1021: "f32[768][1]cuda:0" = _foreach_add_2[133] + getitem_1022: "f32[768][1]cuda:0" = _foreach_add_2[134] + getitem_1023: "f32[768][1]cuda:0" = _foreach_add_2[135] + getitem_1024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[136] + getitem_1025: "f32[2304][1]cuda:0" = _foreach_add_2[137] + getitem_1026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[138] + getitem_1027: "f32[768][1]cuda:0" = _foreach_add_2[139] + getitem_1028: "f32[768][1]cuda:0" = _foreach_add_2[140] + getitem_1029: "f32[768][1]cuda:0" = _foreach_add_2[141] + getitem_1030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[142] + getitem_1031: "f32[3072][1]cuda:0" = _foreach_add_2[143] + getitem_1032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[144] + getitem_1033: "f32[768][1]cuda:0" = _foreach_add_2[145] + getitem_1034: "f32[768][1]cuda:0" = _foreach_add_2[146] + getitem_1035: "f32[768][1]cuda:0" = _foreach_add_2[147]; _foreach_add_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps) + _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]) + getitem_1036: "f32[][]cuda:0" = _foreach_pow[0] + getitem_1037: "f32[][]cuda:0" = _foreach_pow[1] + getitem_1038: "f32[][]cuda:0" = _foreach_pow[2] + getitem_1039: "f32[][]cuda:0" = _foreach_pow[3] + getitem_1040: "f32[][]cuda:0" = _foreach_pow[4] + getitem_1041: "f32[][]cuda:0" = _foreach_pow[5] + getitem_1042: "f32[][]cuda:0" = _foreach_pow[6] + getitem_1043: "f32[][]cuda:0" = _foreach_pow[7] + getitem_1044: "f32[][]cuda:0" = _foreach_pow[8] + getitem_1045: "f32[][]cuda:0" = _foreach_pow[9] + getitem_1046: "f32[][]cuda:0" = _foreach_pow[10] + getitem_1047: "f32[][]cuda:0" = _foreach_pow[11] + getitem_1048: "f32[][]cuda:0" = _foreach_pow[12] + getitem_1049: "f32[][]cuda:0" = _foreach_pow[13] + getitem_1050: "f32[][]cuda:0" = _foreach_pow[14] + getitem_1051: "f32[][]cuda:0" = _foreach_pow[15] + getitem_1052: "f32[][]cuda:0" = _foreach_pow[16] + getitem_1053: "f32[][]cuda:0" = _foreach_pow[17] + getitem_1054: "f32[][]cuda:0" = _foreach_pow[18] + getitem_1055: "f32[][]cuda:0" = _foreach_pow[19] + getitem_1056: "f32[][]cuda:0" = _foreach_pow[20] + getitem_1057: "f32[][]cuda:0" = _foreach_pow[21] + getitem_1058: "f32[][]cuda:0" = _foreach_pow[22] + getitem_1059: "f32[][]cuda:0" = _foreach_pow[23] + getitem_1060: "f32[][]cuda:0" = _foreach_pow[24] + getitem_1061: "f32[][]cuda:0" = _foreach_pow[25] + getitem_1062: "f32[][]cuda:0" = _foreach_pow[26] + getitem_1063: "f32[][]cuda:0" = _foreach_pow[27] + getitem_1064: "f32[][]cuda:0" = _foreach_pow[28] + getitem_1065: "f32[][]cuda:0" = _foreach_pow[29] + getitem_1066: "f32[][]cuda:0" = _foreach_pow[30] + getitem_1067: "f32[][]cuda:0" = _foreach_pow[31] + getitem_1068: "f32[][]cuda:0" = _foreach_pow[32] + getitem_1069: "f32[][]cuda:0" = _foreach_pow[33] + getitem_1070: "f32[][]cuda:0" = _foreach_pow[34] + getitem_1071: "f32[][]cuda:0" = _foreach_pow[35] + getitem_1072: "f32[][]cuda:0" = _foreach_pow[36] + getitem_1073: "f32[][]cuda:0" = _foreach_pow[37] + getitem_1074: "f32[][]cuda:0" = _foreach_pow[38] + getitem_1075: "f32[][]cuda:0" = _foreach_pow[39] + getitem_1076: "f32[][]cuda:0" = _foreach_pow[40] + getitem_1077: "f32[][]cuda:0" = _foreach_pow[41] + getitem_1078: "f32[][]cuda:0" = _foreach_pow[42] + getitem_1079: "f32[][]cuda:0" = _foreach_pow[43] + getitem_1080: "f32[][]cuda:0" = _foreach_pow[44] + getitem_1081: "f32[][]cuda:0" = _foreach_pow[45] + getitem_1082: "f32[][]cuda:0" = _foreach_pow[46] + getitem_1083: "f32[][]cuda:0" = _foreach_pow[47] + getitem_1084: "f32[][]cuda:0" = _foreach_pow[48] + getitem_1085: "f32[][]cuda:0" = _foreach_pow[49] + getitem_1086: "f32[][]cuda:0" = _foreach_pow[50] + getitem_1087: "f32[][]cuda:0" = _foreach_pow[51] + getitem_1088: "f32[][]cuda:0" = _foreach_pow[52] + getitem_1089: "f32[][]cuda:0" = _foreach_pow[53] + getitem_1090: "f32[][]cuda:0" = _foreach_pow[54] + getitem_1091: "f32[][]cuda:0" = _foreach_pow[55] + getitem_1092: "f32[][]cuda:0" = _foreach_pow[56] + getitem_1093: "f32[][]cuda:0" = _foreach_pow[57] + getitem_1094: "f32[][]cuda:0" = _foreach_pow[58] + getitem_1095: "f32[][]cuda:0" = _foreach_pow[59] + getitem_1096: "f32[][]cuda:0" = _foreach_pow[60] + getitem_1097: "f32[][]cuda:0" = _foreach_pow[61] + getitem_1098: "f32[][]cuda:0" = _foreach_pow[62] + getitem_1099: "f32[][]cuda:0" = _foreach_pow[63] + getitem_1100: "f32[][]cuda:0" = _foreach_pow[64] + getitem_1101: "f32[][]cuda:0" = _foreach_pow[65] + getitem_1102: "f32[][]cuda:0" = _foreach_pow[66] + getitem_1103: "f32[][]cuda:0" = _foreach_pow[67] + getitem_1104: "f32[][]cuda:0" = _foreach_pow[68] + getitem_1105: "f32[][]cuda:0" = _foreach_pow[69] + getitem_1106: "f32[][]cuda:0" = _foreach_pow[70] + getitem_1107: "f32[][]cuda:0" = _foreach_pow[71] + getitem_1108: "f32[][]cuda:0" = _foreach_pow[72] + getitem_1109: "f32[][]cuda:0" = _foreach_pow[73] + getitem_1110: "f32[][]cuda:0" = _foreach_pow[74] + getitem_1111: "f32[][]cuda:0" = _foreach_pow[75] + getitem_1112: "f32[][]cuda:0" = _foreach_pow[76] + getitem_1113: "f32[][]cuda:0" = _foreach_pow[77] + getitem_1114: "f32[][]cuda:0" = _foreach_pow[78] + getitem_1115: "f32[][]cuda:0" = _foreach_pow[79] + getitem_1116: "f32[][]cuda:0" = _foreach_pow[80] + getitem_1117: "f32[][]cuda:0" = _foreach_pow[81] + getitem_1118: "f32[][]cuda:0" = _foreach_pow[82] + getitem_1119: "f32[][]cuda:0" = _foreach_pow[83] + getitem_1120: "f32[][]cuda:0" = _foreach_pow[84] + getitem_1121: "f32[][]cuda:0" = _foreach_pow[85] + getitem_1122: "f32[][]cuda:0" = _foreach_pow[86] + getitem_1123: "f32[][]cuda:0" = _foreach_pow[87] + getitem_1124: "f32[][]cuda:0" = _foreach_pow[88] + getitem_1125: "f32[][]cuda:0" = _foreach_pow[89] + getitem_1126: "f32[][]cuda:0" = _foreach_pow[90] + getitem_1127: "f32[][]cuda:0" = _foreach_pow[91] + getitem_1128: "f32[][]cuda:0" = _foreach_pow[92] + getitem_1129: "f32[][]cuda:0" = _foreach_pow[93] + getitem_1130: "f32[][]cuda:0" = _foreach_pow[94] + getitem_1131: "f32[][]cuda:0" = _foreach_pow[95] + getitem_1132: "f32[][]cuda:0" = _foreach_pow[96] + getitem_1133: "f32[][]cuda:0" = _foreach_pow[97] + getitem_1134: "f32[][]cuda:0" = _foreach_pow[98] + getitem_1135: "f32[][]cuda:0" = _foreach_pow[99] + getitem_1136: "f32[][]cuda:0" = _foreach_pow[100] + getitem_1137: "f32[][]cuda:0" = _foreach_pow[101] + getitem_1138: "f32[][]cuda:0" = _foreach_pow[102] + getitem_1139: "f32[][]cuda:0" = _foreach_pow[103] + getitem_1140: "f32[][]cuda:0" = _foreach_pow[104] + getitem_1141: "f32[][]cuda:0" = _foreach_pow[105] + getitem_1142: "f32[][]cuda:0" = _foreach_pow[106] + getitem_1143: "f32[][]cuda:0" = _foreach_pow[107] + getitem_1144: "f32[][]cuda:0" = _foreach_pow[108] + getitem_1145: "f32[][]cuda:0" = _foreach_pow[109] + getitem_1146: "f32[][]cuda:0" = _foreach_pow[110] + getitem_1147: "f32[][]cuda:0" = _foreach_pow[111] + getitem_1148: "f32[][]cuda:0" = _foreach_pow[112] + getitem_1149: "f32[][]cuda:0" = _foreach_pow[113] + getitem_1150: "f32[][]cuda:0" = _foreach_pow[114] + getitem_1151: "f32[][]cuda:0" = _foreach_pow[115] + getitem_1152: "f32[][]cuda:0" = _foreach_pow[116] + getitem_1153: "f32[][]cuda:0" = _foreach_pow[117] + getitem_1154: "f32[][]cuda:0" = _foreach_pow[118] + getitem_1155: "f32[][]cuda:0" = _foreach_pow[119] + getitem_1156: "f32[][]cuda:0" = _foreach_pow[120] + getitem_1157: "f32[][]cuda:0" = _foreach_pow[121] + getitem_1158: "f32[][]cuda:0" = _foreach_pow[122] + getitem_1159: "f32[][]cuda:0" = _foreach_pow[123] + getitem_1160: "f32[][]cuda:0" = _foreach_pow[124] + getitem_1161: "f32[][]cuda:0" = _foreach_pow[125] + getitem_1162: "f32[][]cuda:0" = _foreach_pow[126] + getitem_1163: "f32[][]cuda:0" = _foreach_pow[127] + getitem_1164: "f32[][]cuda:0" = _foreach_pow[128] + getitem_1165: "f32[][]cuda:0" = _foreach_pow[129] + getitem_1166: "f32[][]cuda:0" = _foreach_pow[130] + getitem_1167: "f32[][]cuda:0" = _foreach_pow[131] + getitem_1168: "f32[][]cuda:0" = _foreach_pow[132] + getitem_1169: "f32[][]cuda:0" = _foreach_pow[133] + getitem_1170: "f32[][]cuda:0" = _foreach_pow[134] + getitem_1171: "f32[][]cuda:0" = _foreach_pow[135] + getitem_1172: "f32[][]cuda:0" = _foreach_pow[136] + getitem_1173: "f32[][]cuda:0" = _foreach_pow[137] + getitem_1174: "f32[][]cuda:0" = _foreach_pow[138] + getitem_1175: "f32[][]cuda:0" = _foreach_pow[139] + getitem_1176: "f32[][]cuda:0" = _foreach_pow[140] + getitem_1177: "f32[][]cuda:0" = _foreach_pow[141] + getitem_1178: "f32[][]cuda:0" = _foreach_pow[142] + getitem_1179: "f32[][]cuda:0" = _foreach_pow[143] + getitem_1180: "f32[][]cuda:0" = _foreach_pow[144] + getitem_1181: "f32[][]cuda:0" = _foreach_pow[145] + getitem_1182: "f32[][]cuda:0" = _foreach_pow[146] + getitem_1183: "f32[][]cuda:0" = _foreach_pow[147]; _foreach_pow = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps) + _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]); getitem = getitem_1 = getitem_2 = getitem_3 = getitem_4 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = getitem_9 = getitem_10 = getitem_11 = getitem_12 = getitem_13 = getitem_14 = getitem_15 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = getitem_20 = getitem_21 = getitem_22 = getitem_23 = getitem_24 = getitem_25 = getitem_26 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = getitem_31 = getitem_32 = getitem_33 = getitem_34 = getitem_35 = getitem_36 = getitem_37 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = getitem_42 = getitem_43 = getitem_44 = getitem_45 = getitem_46 = getitem_47 = getitem_48 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = getitem_53 = getitem_54 = getitem_55 = getitem_56 = getitem_57 = getitem_58 = getitem_59 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = getitem_64 = getitem_65 = getitem_66 = getitem_67 = getitem_68 = getitem_69 = getitem_70 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = getitem_75 = getitem_76 = getitem_77 = getitem_78 = getitem_79 = getitem_80 = getitem_81 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = getitem_86 = getitem_87 = getitem_88 = getitem_89 = getitem_90 = getitem_91 = getitem_92 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = getitem_97 = getitem_98 = getitem_99 = getitem_100 = getitem_101 = getitem_102 = getitem_103 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = getitem_108 = getitem_109 = getitem_110 = getitem_111 = getitem_112 = getitem_113 = getitem_114 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = getitem_119 = getitem_120 = getitem_121 = getitem_122 = getitem_123 = getitem_124 = getitem_125 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = getitem_130 = getitem_131 = getitem_132 = getitem_133 = getitem_134 = getitem_135 = getitem_136 = getitem_137 = getitem_138 = getitem_139 = getitem_140 = getitem_141 = getitem_142 = getitem_143 = getitem_144 = getitem_145 = getitem_146 = getitem_147 = None + getitem_1184: "f32[][]cuda:0" = _foreach_pow_1[0] + getitem_1185: "f32[][]cuda:0" = _foreach_pow_1[1] + getitem_1186: "f32[][]cuda:0" = _foreach_pow_1[2] + getitem_1187: "f32[][]cuda:0" = _foreach_pow_1[3] + getitem_1188: "f32[][]cuda:0" = _foreach_pow_1[4] + getitem_1189: "f32[][]cuda:0" = _foreach_pow_1[5] + getitem_1190: "f32[][]cuda:0" = _foreach_pow_1[6] + getitem_1191: "f32[][]cuda:0" = _foreach_pow_1[7] + getitem_1192: "f32[][]cuda:0" = _foreach_pow_1[8] + getitem_1193: "f32[][]cuda:0" = _foreach_pow_1[9] + getitem_1194: "f32[][]cuda:0" = _foreach_pow_1[10] + getitem_1195: "f32[][]cuda:0" = _foreach_pow_1[11] + getitem_1196: "f32[][]cuda:0" = _foreach_pow_1[12] + getitem_1197: "f32[][]cuda:0" = _foreach_pow_1[13] + getitem_1198: "f32[][]cuda:0" = _foreach_pow_1[14] + getitem_1199: "f32[][]cuda:0" = _foreach_pow_1[15] + getitem_1200: "f32[][]cuda:0" = _foreach_pow_1[16] + getitem_1201: "f32[][]cuda:0" = _foreach_pow_1[17] + getitem_1202: "f32[][]cuda:0" = _foreach_pow_1[18] + getitem_1203: "f32[][]cuda:0" = _foreach_pow_1[19] + getitem_1204: "f32[][]cuda:0" = _foreach_pow_1[20] + getitem_1205: "f32[][]cuda:0" = _foreach_pow_1[21] + getitem_1206: "f32[][]cuda:0" = _foreach_pow_1[22] + getitem_1207: "f32[][]cuda:0" = _foreach_pow_1[23] + getitem_1208: "f32[][]cuda:0" = _foreach_pow_1[24] + getitem_1209: "f32[][]cuda:0" = _foreach_pow_1[25] + getitem_1210: "f32[][]cuda:0" = _foreach_pow_1[26] + getitem_1211: "f32[][]cuda:0" = _foreach_pow_1[27] + getitem_1212: "f32[][]cuda:0" = _foreach_pow_1[28] + getitem_1213: "f32[][]cuda:0" = _foreach_pow_1[29] + getitem_1214: "f32[][]cuda:0" = _foreach_pow_1[30] + getitem_1215: "f32[][]cuda:0" = _foreach_pow_1[31] + getitem_1216: "f32[][]cuda:0" = _foreach_pow_1[32] + getitem_1217: "f32[][]cuda:0" = _foreach_pow_1[33] + getitem_1218: "f32[][]cuda:0" = _foreach_pow_1[34] + getitem_1219: "f32[][]cuda:0" = _foreach_pow_1[35] + getitem_1220: "f32[][]cuda:0" = _foreach_pow_1[36] + getitem_1221: "f32[][]cuda:0" = _foreach_pow_1[37] + getitem_1222: "f32[][]cuda:0" = _foreach_pow_1[38] + getitem_1223: "f32[][]cuda:0" = _foreach_pow_1[39] + getitem_1224: "f32[][]cuda:0" = _foreach_pow_1[40] + getitem_1225: "f32[][]cuda:0" = _foreach_pow_1[41] + getitem_1226: "f32[][]cuda:0" = _foreach_pow_1[42] + getitem_1227: "f32[][]cuda:0" = _foreach_pow_1[43] + getitem_1228: "f32[][]cuda:0" = _foreach_pow_1[44] + getitem_1229: "f32[][]cuda:0" = _foreach_pow_1[45] + getitem_1230: "f32[][]cuda:0" = _foreach_pow_1[46] + getitem_1231: "f32[][]cuda:0" = _foreach_pow_1[47] + getitem_1232: "f32[][]cuda:0" = _foreach_pow_1[48] + getitem_1233: "f32[][]cuda:0" = _foreach_pow_1[49] + getitem_1234: "f32[][]cuda:0" = _foreach_pow_1[50] + getitem_1235: "f32[][]cuda:0" = _foreach_pow_1[51] + getitem_1236: "f32[][]cuda:0" = _foreach_pow_1[52] + getitem_1237: "f32[][]cuda:0" = _foreach_pow_1[53] + getitem_1238: "f32[][]cuda:0" = _foreach_pow_1[54] + getitem_1239: "f32[][]cuda:0" = _foreach_pow_1[55] + getitem_1240: "f32[][]cuda:0" = _foreach_pow_1[56] + getitem_1241: "f32[][]cuda:0" = _foreach_pow_1[57] + getitem_1242: "f32[][]cuda:0" = _foreach_pow_1[58] + getitem_1243: "f32[][]cuda:0" = _foreach_pow_1[59] + getitem_1244: "f32[][]cuda:0" = _foreach_pow_1[60] + getitem_1245: "f32[][]cuda:0" = _foreach_pow_1[61] + getitem_1246: "f32[][]cuda:0" = _foreach_pow_1[62] + getitem_1247: "f32[][]cuda:0" = _foreach_pow_1[63] + getitem_1248: "f32[][]cuda:0" = _foreach_pow_1[64] + getitem_1249: "f32[][]cuda:0" = _foreach_pow_1[65] + getitem_1250: "f32[][]cuda:0" = _foreach_pow_1[66] + getitem_1251: "f32[][]cuda:0" = _foreach_pow_1[67] + getitem_1252: "f32[][]cuda:0" = _foreach_pow_1[68] + getitem_1253: "f32[][]cuda:0" = _foreach_pow_1[69] + getitem_1254: "f32[][]cuda:0" = _foreach_pow_1[70] + getitem_1255: "f32[][]cuda:0" = _foreach_pow_1[71] + getitem_1256: "f32[][]cuda:0" = _foreach_pow_1[72] + getitem_1257: "f32[][]cuda:0" = _foreach_pow_1[73] + getitem_1258: "f32[][]cuda:0" = _foreach_pow_1[74] + getitem_1259: "f32[][]cuda:0" = _foreach_pow_1[75] + getitem_1260: "f32[][]cuda:0" = _foreach_pow_1[76] + getitem_1261: "f32[][]cuda:0" = _foreach_pow_1[77] + getitem_1262: "f32[][]cuda:0" = _foreach_pow_1[78] + getitem_1263: "f32[][]cuda:0" = _foreach_pow_1[79] + getitem_1264: "f32[][]cuda:0" = _foreach_pow_1[80] + getitem_1265: "f32[][]cuda:0" = _foreach_pow_1[81] + getitem_1266: "f32[][]cuda:0" = _foreach_pow_1[82] + getitem_1267: "f32[][]cuda:0" = _foreach_pow_1[83] + getitem_1268: "f32[][]cuda:0" = _foreach_pow_1[84] + getitem_1269: "f32[][]cuda:0" = _foreach_pow_1[85] + getitem_1270: "f32[][]cuda:0" = _foreach_pow_1[86] + getitem_1271: "f32[][]cuda:0" = _foreach_pow_1[87] + getitem_1272: "f32[][]cuda:0" = _foreach_pow_1[88] + getitem_1273: "f32[][]cuda:0" = _foreach_pow_1[89] + getitem_1274: "f32[][]cuda:0" = _foreach_pow_1[90] + getitem_1275: "f32[][]cuda:0" = _foreach_pow_1[91] + getitem_1276: "f32[][]cuda:0" = _foreach_pow_1[92] + getitem_1277: "f32[][]cuda:0" = _foreach_pow_1[93] + getitem_1278: "f32[][]cuda:0" = _foreach_pow_1[94] + getitem_1279: "f32[][]cuda:0" = _foreach_pow_1[95] + getitem_1280: "f32[][]cuda:0" = _foreach_pow_1[96] + getitem_1281: "f32[][]cuda:0" = _foreach_pow_1[97] + getitem_1282: "f32[][]cuda:0" = _foreach_pow_1[98] + getitem_1283: "f32[][]cuda:0" = _foreach_pow_1[99] + getitem_1284: "f32[][]cuda:0" = _foreach_pow_1[100] + getitem_1285: "f32[][]cuda:0" = _foreach_pow_1[101] + getitem_1286: "f32[][]cuda:0" = _foreach_pow_1[102] + getitem_1287: "f32[][]cuda:0" = _foreach_pow_1[103] + getitem_1288: "f32[][]cuda:0" = _foreach_pow_1[104] + getitem_1289: "f32[][]cuda:0" = _foreach_pow_1[105] + getitem_1290: "f32[][]cuda:0" = _foreach_pow_1[106] + getitem_1291: "f32[][]cuda:0" = _foreach_pow_1[107] + getitem_1292: "f32[][]cuda:0" = _foreach_pow_1[108] + getitem_1293: "f32[][]cuda:0" = _foreach_pow_1[109] + getitem_1294: "f32[][]cuda:0" = _foreach_pow_1[110] + getitem_1295: "f32[][]cuda:0" = _foreach_pow_1[111] + getitem_1296: "f32[][]cuda:0" = _foreach_pow_1[112] + getitem_1297: "f32[][]cuda:0" = _foreach_pow_1[113] + getitem_1298: "f32[][]cuda:0" = _foreach_pow_1[114] + getitem_1299: "f32[][]cuda:0" = _foreach_pow_1[115] + getitem_1300: "f32[][]cuda:0" = _foreach_pow_1[116] + getitem_1301: "f32[][]cuda:0" = _foreach_pow_1[117] + getitem_1302: "f32[][]cuda:0" = _foreach_pow_1[118] + getitem_1303: "f32[][]cuda:0" = _foreach_pow_1[119] + getitem_1304: "f32[][]cuda:0" = _foreach_pow_1[120] + getitem_1305: "f32[][]cuda:0" = _foreach_pow_1[121] + getitem_1306: "f32[][]cuda:0" = _foreach_pow_1[122] + getitem_1307: "f32[][]cuda:0" = _foreach_pow_1[123] + getitem_1308: "f32[][]cuda:0" = _foreach_pow_1[124] + getitem_1309: "f32[][]cuda:0" = _foreach_pow_1[125] + getitem_1310: "f32[][]cuda:0" = _foreach_pow_1[126] + getitem_1311: "f32[][]cuda:0" = _foreach_pow_1[127] + getitem_1312: "f32[][]cuda:0" = _foreach_pow_1[128] + getitem_1313: "f32[][]cuda:0" = _foreach_pow_1[129] + getitem_1314: "f32[][]cuda:0" = _foreach_pow_1[130] + getitem_1315: "f32[][]cuda:0" = _foreach_pow_1[131] + getitem_1316: "f32[][]cuda:0" = _foreach_pow_1[132] + getitem_1317: "f32[][]cuda:0" = _foreach_pow_1[133] + getitem_1318: "f32[][]cuda:0" = _foreach_pow_1[134] + getitem_1319: "f32[][]cuda:0" = _foreach_pow_1[135] + getitem_1320: "f32[][]cuda:0" = _foreach_pow_1[136] + getitem_1321: "f32[][]cuda:0" = _foreach_pow_1[137] + getitem_1322: "f32[][]cuda:0" = _foreach_pow_1[138] + getitem_1323: "f32[][]cuda:0" = _foreach_pow_1[139] + getitem_1324: "f32[][]cuda:0" = _foreach_pow_1[140] + getitem_1325: "f32[][]cuda:0" = _foreach_pow_1[141] + getitem_1326: "f32[][]cuda:0" = _foreach_pow_1[142] + getitem_1327: "f32[][]cuda:0" = _foreach_pow_1[143] + getitem_1328: "f32[][]cuda:0" = _foreach_pow_1[144] + getitem_1329: "f32[][]cuda:0" = _foreach_pow_1[145] + getitem_1330: "f32[][]cuda:0" = _foreach_pow_1[146] + getitem_1331: "f32[][]cuda:0" = _foreach_pow_1[147]; _foreach_pow_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1) + _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1); getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None + getitem_1332: "f32[][]cuda:0" = _foreach_sub_1[0] + getitem_1333: "f32[][]cuda:0" = _foreach_sub_1[1] + getitem_1334: "f32[][]cuda:0" = _foreach_sub_1[2] + getitem_1335: "f32[][]cuda:0" = _foreach_sub_1[3] + getitem_1336: "f32[][]cuda:0" = _foreach_sub_1[4] + getitem_1337: "f32[][]cuda:0" = _foreach_sub_1[5] + getitem_1338: "f32[][]cuda:0" = _foreach_sub_1[6] + getitem_1339: "f32[][]cuda:0" = _foreach_sub_1[7] + getitem_1340: "f32[][]cuda:0" = _foreach_sub_1[8] + getitem_1341: "f32[][]cuda:0" = _foreach_sub_1[9] + getitem_1342: "f32[][]cuda:0" = _foreach_sub_1[10] + getitem_1343: "f32[][]cuda:0" = _foreach_sub_1[11] + getitem_1344: "f32[][]cuda:0" = _foreach_sub_1[12] + getitem_1345: "f32[][]cuda:0" = _foreach_sub_1[13] + getitem_1346: "f32[][]cuda:0" = _foreach_sub_1[14] + getitem_1347: "f32[][]cuda:0" = _foreach_sub_1[15] + getitem_1348: "f32[][]cuda:0" = _foreach_sub_1[16] + getitem_1349: "f32[][]cuda:0" = _foreach_sub_1[17] + getitem_1350: "f32[][]cuda:0" = _foreach_sub_1[18] + getitem_1351: "f32[][]cuda:0" = _foreach_sub_1[19] + getitem_1352: "f32[][]cuda:0" = _foreach_sub_1[20] + getitem_1353: "f32[][]cuda:0" = _foreach_sub_1[21] + getitem_1354: "f32[][]cuda:0" = _foreach_sub_1[22] + getitem_1355: "f32[][]cuda:0" = _foreach_sub_1[23] + getitem_1356: "f32[][]cuda:0" = _foreach_sub_1[24] + getitem_1357: "f32[][]cuda:0" = _foreach_sub_1[25] + getitem_1358: "f32[][]cuda:0" = _foreach_sub_1[26] + getitem_1359: "f32[][]cuda:0" = _foreach_sub_1[27] + getitem_1360: "f32[][]cuda:0" = _foreach_sub_1[28] + getitem_1361: "f32[][]cuda:0" = _foreach_sub_1[29] + getitem_1362: "f32[][]cuda:0" = _foreach_sub_1[30] + getitem_1363: "f32[][]cuda:0" = _foreach_sub_1[31] + getitem_1364: "f32[][]cuda:0" = _foreach_sub_1[32] + getitem_1365: "f32[][]cuda:0" = _foreach_sub_1[33] + getitem_1366: "f32[][]cuda:0" = _foreach_sub_1[34] + getitem_1367: "f32[][]cuda:0" = _foreach_sub_1[35] + getitem_1368: "f32[][]cuda:0" = _foreach_sub_1[36] + getitem_1369: "f32[][]cuda:0" = _foreach_sub_1[37] + getitem_1370: "f32[][]cuda:0" = _foreach_sub_1[38] + getitem_1371: "f32[][]cuda:0" = _foreach_sub_1[39] + getitem_1372: "f32[][]cuda:0" = _foreach_sub_1[40] + getitem_1373: "f32[][]cuda:0" = _foreach_sub_1[41] + getitem_1374: "f32[][]cuda:0" = _foreach_sub_1[42] + getitem_1375: "f32[][]cuda:0" = _foreach_sub_1[43] + getitem_1376: "f32[][]cuda:0" = _foreach_sub_1[44] + getitem_1377: "f32[][]cuda:0" = _foreach_sub_1[45] + getitem_1378: "f32[][]cuda:0" = _foreach_sub_1[46] + getitem_1379: "f32[][]cuda:0" = _foreach_sub_1[47] + getitem_1380: "f32[][]cuda:0" = _foreach_sub_1[48] + getitem_1381: "f32[][]cuda:0" = _foreach_sub_1[49] + getitem_1382: "f32[][]cuda:0" = _foreach_sub_1[50] + getitem_1383: "f32[][]cuda:0" = _foreach_sub_1[51] + getitem_1384: "f32[][]cuda:0" = _foreach_sub_1[52] + getitem_1385: "f32[][]cuda:0" = _foreach_sub_1[53] + getitem_1386: "f32[][]cuda:0" = _foreach_sub_1[54] + getitem_1387: "f32[][]cuda:0" = _foreach_sub_1[55] + getitem_1388: "f32[][]cuda:0" = _foreach_sub_1[56] + getitem_1389: "f32[][]cuda:0" = _foreach_sub_1[57] + getitem_1390: "f32[][]cuda:0" = _foreach_sub_1[58] + getitem_1391: "f32[][]cuda:0" = _foreach_sub_1[59] + getitem_1392: "f32[][]cuda:0" = _foreach_sub_1[60] + getitem_1393: "f32[][]cuda:0" = _foreach_sub_1[61] + getitem_1394: "f32[][]cuda:0" = _foreach_sub_1[62] + getitem_1395: "f32[][]cuda:0" = _foreach_sub_1[63] + getitem_1396: "f32[][]cuda:0" = _foreach_sub_1[64] + getitem_1397: "f32[][]cuda:0" = _foreach_sub_1[65] + getitem_1398: "f32[][]cuda:0" = _foreach_sub_1[66] + getitem_1399: "f32[][]cuda:0" = _foreach_sub_1[67] + getitem_1400: "f32[][]cuda:0" = _foreach_sub_1[68] + getitem_1401: "f32[][]cuda:0" = _foreach_sub_1[69] + getitem_1402: "f32[][]cuda:0" = _foreach_sub_1[70] + getitem_1403: "f32[][]cuda:0" = _foreach_sub_1[71] + getitem_1404: "f32[][]cuda:0" = _foreach_sub_1[72] + getitem_1405: "f32[][]cuda:0" = _foreach_sub_1[73] + getitem_1406: "f32[][]cuda:0" = _foreach_sub_1[74] + getitem_1407: "f32[][]cuda:0" = _foreach_sub_1[75] + getitem_1408: "f32[][]cuda:0" = _foreach_sub_1[76] + getitem_1409: "f32[][]cuda:0" = _foreach_sub_1[77] + getitem_1410: "f32[][]cuda:0" = _foreach_sub_1[78] + getitem_1411: "f32[][]cuda:0" = _foreach_sub_1[79] + getitem_1412: "f32[][]cuda:0" = _foreach_sub_1[80] + getitem_1413: "f32[][]cuda:0" = _foreach_sub_1[81] + getitem_1414: "f32[][]cuda:0" = _foreach_sub_1[82] + getitem_1415: "f32[][]cuda:0" = _foreach_sub_1[83] + getitem_1416: "f32[][]cuda:0" = _foreach_sub_1[84] + getitem_1417: "f32[][]cuda:0" = _foreach_sub_1[85] + getitem_1418: "f32[][]cuda:0" = _foreach_sub_1[86] + getitem_1419: "f32[][]cuda:0" = _foreach_sub_1[87] + getitem_1420: "f32[][]cuda:0" = _foreach_sub_1[88] + getitem_1421: "f32[][]cuda:0" = _foreach_sub_1[89] + getitem_1422: "f32[][]cuda:0" = _foreach_sub_1[90] + getitem_1423: "f32[][]cuda:0" = _foreach_sub_1[91] + getitem_1424: "f32[][]cuda:0" = _foreach_sub_1[92] + getitem_1425: "f32[][]cuda:0" = _foreach_sub_1[93] + getitem_1426: "f32[][]cuda:0" = _foreach_sub_1[94] + getitem_1427: "f32[][]cuda:0" = _foreach_sub_1[95] + getitem_1428: "f32[][]cuda:0" = _foreach_sub_1[96] + getitem_1429: "f32[][]cuda:0" = _foreach_sub_1[97] + getitem_1430: "f32[][]cuda:0" = _foreach_sub_1[98] + getitem_1431: "f32[][]cuda:0" = _foreach_sub_1[99] + getitem_1432: "f32[][]cuda:0" = _foreach_sub_1[100] + getitem_1433: "f32[][]cuda:0" = _foreach_sub_1[101] + getitem_1434: "f32[][]cuda:0" = _foreach_sub_1[102] + getitem_1435: "f32[][]cuda:0" = _foreach_sub_1[103] + getitem_1436: "f32[][]cuda:0" = _foreach_sub_1[104] + getitem_1437: "f32[][]cuda:0" = _foreach_sub_1[105] + getitem_1438: "f32[][]cuda:0" = _foreach_sub_1[106] + getitem_1439: "f32[][]cuda:0" = _foreach_sub_1[107] + getitem_1440: "f32[][]cuda:0" = _foreach_sub_1[108] + getitem_1441: "f32[][]cuda:0" = _foreach_sub_1[109] + getitem_1442: "f32[][]cuda:0" = _foreach_sub_1[110] + getitem_1443: "f32[][]cuda:0" = _foreach_sub_1[111] + getitem_1444: "f32[][]cuda:0" = _foreach_sub_1[112] + getitem_1445: "f32[][]cuda:0" = _foreach_sub_1[113] + getitem_1446: "f32[][]cuda:0" = _foreach_sub_1[114] + getitem_1447: "f32[][]cuda:0" = _foreach_sub_1[115] + getitem_1448: "f32[][]cuda:0" = _foreach_sub_1[116] + getitem_1449: "f32[][]cuda:0" = _foreach_sub_1[117] + getitem_1450: "f32[][]cuda:0" = _foreach_sub_1[118] + getitem_1451: "f32[][]cuda:0" = _foreach_sub_1[119] + getitem_1452: "f32[][]cuda:0" = _foreach_sub_1[120] + getitem_1453: "f32[][]cuda:0" = _foreach_sub_1[121] + getitem_1454: "f32[][]cuda:0" = _foreach_sub_1[122] + getitem_1455: "f32[][]cuda:0" = _foreach_sub_1[123] + getitem_1456: "f32[][]cuda:0" = _foreach_sub_1[124] + getitem_1457: "f32[][]cuda:0" = _foreach_sub_1[125] + getitem_1458: "f32[][]cuda:0" = _foreach_sub_1[126] + getitem_1459: "f32[][]cuda:0" = _foreach_sub_1[127] + getitem_1460: "f32[][]cuda:0" = _foreach_sub_1[128] + getitem_1461: "f32[][]cuda:0" = _foreach_sub_1[129] + getitem_1462: "f32[][]cuda:0" = _foreach_sub_1[130] + getitem_1463: "f32[][]cuda:0" = _foreach_sub_1[131] + getitem_1464: "f32[][]cuda:0" = _foreach_sub_1[132] + getitem_1465: "f32[][]cuda:0" = _foreach_sub_1[133] + getitem_1466: "f32[][]cuda:0" = _foreach_sub_1[134] + getitem_1467: "f32[][]cuda:0" = _foreach_sub_1[135] + getitem_1468: "f32[][]cuda:0" = _foreach_sub_1[136] + getitem_1469: "f32[][]cuda:0" = _foreach_sub_1[137] + getitem_1470: "f32[][]cuda:0" = _foreach_sub_1[138] + getitem_1471: "f32[][]cuda:0" = _foreach_sub_1[139] + getitem_1472: "f32[][]cuda:0" = _foreach_sub_1[140] + getitem_1473: "f32[][]cuda:0" = _foreach_sub_1[141] + getitem_1474: "f32[][]cuda:0" = _foreach_sub_1[142] + getitem_1475: "f32[][]cuda:0" = _foreach_sub_1[143] + getitem_1476: "f32[][]cuda:0" = _foreach_sub_1[144] + getitem_1477: "f32[][]cuda:0" = _foreach_sub_1[145] + getitem_1478: "f32[][]cuda:0" = _foreach_sub_1[146] + getitem_1479: "f32[][]cuda:0" = _foreach_sub_1[147]; _foreach_sub_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1) + _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1); getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None + getitem_1480: "f32[][]cuda:0" = _foreach_sub_2[0] + getitem_1481: "f32[][]cuda:0" = _foreach_sub_2[1] + getitem_1482: "f32[][]cuda:0" = _foreach_sub_2[2] + getitem_1483: "f32[][]cuda:0" = _foreach_sub_2[3] + getitem_1484: "f32[][]cuda:0" = _foreach_sub_2[4] + getitem_1485: "f32[][]cuda:0" = _foreach_sub_2[5] + getitem_1486: "f32[][]cuda:0" = _foreach_sub_2[6] + getitem_1487: "f32[][]cuda:0" = _foreach_sub_2[7] + getitem_1488: "f32[][]cuda:0" = _foreach_sub_2[8] + getitem_1489: "f32[][]cuda:0" = _foreach_sub_2[9] + getitem_1490: "f32[][]cuda:0" = _foreach_sub_2[10] + getitem_1491: "f32[][]cuda:0" = _foreach_sub_2[11] + getitem_1492: "f32[][]cuda:0" = _foreach_sub_2[12] + getitem_1493: "f32[][]cuda:0" = _foreach_sub_2[13] + getitem_1494: "f32[][]cuda:0" = _foreach_sub_2[14] + getitem_1495: "f32[][]cuda:0" = _foreach_sub_2[15] + getitem_1496: "f32[][]cuda:0" = _foreach_sub_2[16] + getitem_1497: "f32[][]cuda:0" = _foreach_sub_2[17] + getitem_1498: "f32[][]cuda:0" = _foreach_sub_2[18] + getitem_1499: "f32[][]cuda:0" = _foreach_sub_2[19] + getitem_1500: "f32[][]cuda:0" = _foreach_sub_2[20] + getitem_1501: "f32[][]cuda:0" = _foreach_sub_2[21] + getitem_1502: "f32[][]cuda:0" = _foreach_sub_2[22] + getitem_1503: "f32[][]cuda:0" = _foreach_sub_2[23] + getitem_1504: "f32[][]cuda:0" = _foreach_sub_2[24] + getitem_1505: "f32[][]cuda:0" = _foreach_sub_2[25] + getitem_1506: "f32[][]cuda:0" = _foreach_sub_2[26] + getitem_1507: "f32[][]cuda:0" = _foreach_sub_2[27] + getitem_1508: "f32[][]cuda:0" = _foreach_sub_2[28] + getitem_1509: "f32[][]cuda:0" = _foreach_sub_2[29] + getitem_1510: "f32[][]cuda:0" = _foreach_sub_2[30] + getitem_1511: "f32[][]cuda:0" = _foreach_sub_2[31] + getitem_1512: "f32[][]cuda:0" = _foreach_sub_2[32] + getitem_1513: "f32[][]cuda:0" = _foreach_sub_2[33] + getitem_1514: "f32[][]cuda:0" = _foreach_sub_2[34] + getitem_1515: "f32[][]cuda:0" = _foreach_sub_2[35] + getitem_1516: "f32[][]cuda:0" = _foreach_sub_2[36] + getitem_1517: "f32[][]cuda:0" = _foreach_sub_2[37] + getitem_1518: "f32[][]cuda:0" = _foreach_sub_2[38] + getitem_1519: "f32[][]cuda:0" = _foreach_sub_2[39] + getitem_1520: "f32[][]cuda:0" = _foreach_sub_2[40] + getitem_1521: "f32[][]cuda:0" = _foreach_sub_2[41] + getitem_1522: "f32[][]cuda:0" = _foreach_sub_2[42] + getitem_1523: "f32[][]cuda:0" = _foreach_sub_2[43] + getitem_1524: "f32[][]cuda:0" = _foreach_sub_2[44] + getitem_1525: "f32[][]cuda:0" = _foreach_sub_2[45] + getitem_1526: "f32[][]cuda:0" = _foreach_sub_2[46] + getitem_1527: "f32[][]cuda:0" = _foreach_sub_2[47] + getitem_1528: "f32[][]cuda:0" = _foreach_sub_2[48] + getitem_1529: "f32[][]cuda:0" = _foreach_sub_2[49] + getitem_1530: "f32[][]cuda:0" = _foreach_sub_2[50] + getitem_1531: "f32[][]cuda:0" = _foreach_sub_2[51] + getitem_1532: "f32[][]cuda:0" = _foreach_sub_2[52] + getitem_1533: "f32[][]cuda:0" = _foreach_sub_2[53] + getitem_1534: "f32[][]cuda:0" = _foreach_sub_2[54] + getitem_1535: "f32[][]cuda:0" = _foreach_sub_2[55] + getitem_1536: "f32[][]cuda:0" = _foreach_sub_2[56] + getitem_1537: "f32[][]cuda:0" = _foreach_sub_2[57] + getitem_1538: "f32[][]cuda:0" = _foreach_sub_2[58] + getitem_1539: "f32[][]cuda:0" = _foreach_sub_2[59] + getitem_1540: "f32[][]cuda:0" = _foreach_sub_2[60] + getitem_1541: "f32[][]cuda:0" = _foreach_sub_2[61] + getitem_1542: "f32[][]cuda:0" = _foreach_sub_2[62] + getitem_1543: "f32[][]cuda:0" = _foreach_sub_2[63] + getitem_1544: "f32[][]cuda:0" = _foreach_sub_2[64] + getitem_1545: "f32[][]cuda:0" = _foreach_sub_2[65] + getitem_1546: "f32[][]cuda:0" = _foreach_sub_2[66] + getitem_1547: "f32[][]cuda:0" = _foreach_sub_2[67] + getitem_1548: "f32[][]cuda:0" = _foreach_sub_2[68] + getitem_1549: "f32[][]cuda:0" = _foreach_sub_2[69] + getitem_1550: "f32[][]cuda:0" = _foreach_sub_2[70] + getitem_1551: "f32[][]cuda:0" = _foreach_sub_2[71] + getitem_1552: "f32[][]cuda:0" = _foreach_sub_2[72] + getitem_1553: "f32[][]cuda:0" = _foreach_sub_2[73] + getitem_1554: "f32[][]cuda:0" = _foreach_sub_2[74] + getitem_1555: "f32[][]cuda:0" = _foreach_sub_2[75] + getitem_1556: "f32[][]cuda:0" = _foreach_sub_2[76] + getitem_1557: "f32[][]cuda:0" = _foreach_sub_2[77] + getitem_1558: "f32[][]cuda:0" = _foreach_sub_2[78] + getitem_1559: "f32[][]cuda:0" = _foreach_sub_2[79] + getitem_1560: "f32[][]cuda:0" = _foreach_sub_2[80] + getitem_1561: "f32[][]cuda:0" = _foreach_sub_2[81] + getitem_1562: "f32[][]cuda:0" = _foreach_sub_2[82] + getitem_1563: "f32[][]cuda:0" = _foreach_sub_2[83] + getitem_1564: "f32[][]cuda:0" = _foreach_sub_2[84] + getitem_1565: "f32[][]cuda:0" = _foreach_sub_2[85] + getitem_1566: "f32[][]cuda:0" = _foreach_sub_2[86] + getitem_1567: "f32[][]cuda:0" = _foreach_sub_2[87] + getitem_1568: "f32[][]cuda:0" = _foreach_sub_2[88] + getitem_1569: "f32[][]cuda:0" = _foreach_sub_2[89] + getitem_1570: "f32[][]cuda:0" = _foreach_sub_2[90] + getitem_1571: "f32[][]cuda:0" = _foreach_sub_2[91] + getitem_1572: "f32[][]cuda:0" = _foreach_sub_2[92] + getitem_1573: "f32[][]cuda:0" = _foreach_sub_2[93] + getitem_1574: "f32[][]cuda:0" = _foreach_sub_2[94] + getitem_1575: "f32[][]cuda:0" = _foreach_sub_2[95] + getitem_1576: "f32[][]cuda:0" = _foreach_sub_2[96] + getitem_1577: "f32[][]cuda:0" = _foreach_sub_2[97] + getitem_1578: "f32[][]cuda:0" = _foreach_sub_2[98] + getitem_1579: "f32[][]cuda:0" = _foreach_sub_2[99] + getitem_1580: "f32[][]cuda:0" = _foreach_sub_2[100] + getitem_1581: "f32[][]cuda:0" = _foreach_sub_2[101] + getitem_1582: "f32[][]cuda:0" = _foreach_sub_2[102] + getitem_1583: "f32[][]cuda:0" = _foreach_sub_2[103] + getitem_1584: "f32[][]cuda:0" = _foreach_sub_2[104] + getitem_1585: "f32[][]cuda:0" = _foreach_sub_2[105] + getitem_1586: "f32[][]cuda:0" = _foreach_sub_2[106] + getitem_1587: "f32[][]cuda:0" = _foreach_sub_2[107] + getitem_1588: "f32[][]cuda:0" = _foreach_sub_2[108] + getitem_1589: "f32[][]cuda:0" = _foreach_sub_2[109] + getitem_1590: "f32[][]cuda:0" = _foreach_sub_2[110] + getitem_1591: "f32[][]cuda:0" = _foreach_sub_2[111] + getitem_1592: "f32[][]cuda:0" = _foreach_sub_2[112] + getitem_1593: "f32[][]cuda:0" = _foreach_sub_2[113] + getitem_1594: "f32[][]cuda:0" = _foreach_sub_2[114] + getitem_1595: "f32[][]cuda:0" = _foreach_sub_2[115] + getitem_1596: "f32[][]cuda:0" = _foreach_sub_2[116] + getitem_1597: "f32[][]cuda:0" = _foreach_sub_2[117] + getitem_1598: "f32[][]cuda:0" = _foreach_sub_2[118] + getitem_1599: "f32[][]cuda:0" = _foreach_sub_2[119] + getitem_1600: "f32[][]cuda:0" = _foreach_sub_2[120] + getitem_1601: "f32[][]cuda:0" = _foreach_sub_2[121] + getitem_1602: "f32[][]cuda:0" = _foreach_sub_2[122] + getitem_1603: "f32[][]cuda:0" = _foreach_sub_2[123] + getitem_1604: "f32[][]cuda:0" = _foreach_sub_2[124] + getitem_1605: "f32[][]cuda:0" = _foreach_sub_2[125] + getitem_1606: "f32[][]cuda:0" = _foreach_sub_2[126] + getitem_1607: "f32[][]cuda:0" = _foreach_sub_2[127] + getitem_1608: "f32[][]cuda:0" = _foreach_sub_2[128] + getitem_1609: "f32[][]cuda:0" = _foreach_sub_2[129] + getitem_1610: "f32[][]cuda:0" = _foreach_sub_2[130] + getitem_1611: "f32[][]cuda:0" = _foreach_sub_2[131] + getitem_1612: "f32[][]cuda:0" = _foreach_sub_2[132] + getitem_1613: "f32[][]cuda:0" = _foreach_sub_2[133] + getitem_1614: "f32[][]cuda:0" = _foreach_sub_2[134] + getitem_1615: "f32[][]cuda:0" = _foreach_sub_2[135] + getitem_1616: "f32[][]cuda:0" = _foreach_sub_2[136] + getitem_1617: "f32[][]cuda:0" = _foreach_sub_2[137] + getitem_1618: "f32[][]cuda:0" = _foreach_sub_2[138] + getitem_1619: "f32[][]cuda:0" = _foreach_sub_2[139] + getitem_1620: "f32[][]cuda:0" = _foreach_sub_2[140] + getitem_1621: "f32[][]cuda:0" = _foreach_sub_2[141] + getitem_1622: "f32[][]cuda:0" = _foreach_sub_2[142] + getitem_1623: "f32[][]cuda:0" = _foreach_sub_2[143] + getitem_1624: "f32[][]cuda:0" = _foreach_sub_2[144] + getitem_1625: "f32[][]cuda:0" = _foreach_sub_2[145] + getitem_1626: "f32[][]cuda:0" = _foreach_sub_2[146] + getitem_1627: "f32[][]cuda:0" = _foreach_sub_2[147]; _foreach_sub_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2) + _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]); getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None + getitem_1628: "f32[][]cuda:0" = _foreach_neg[0] + getitem_1629: "f32[][]cuda:0" = _foreach_neg[1] + getitem_1630: "f32[][]cuda:0" = _foreach_neg[2] + getitem_1631: "f32[][]cuda:0" = _foreach_neg[3] + getitem_1632: "f32[][]cuda:0" = _foreach_neg[4] + getitem_1633: "f32[][]cuda:0" = _foreach_neg[5] + getitem_1634: "f32[][]cuda:0" = _foreach_neg[6] + getitem_1635: "f32[][]cuda:0" = _foreach_neg[7] + getitem_1636: "f32[][]cuda:0" = _foreach_neg[8] + getitem_1637: "f32[][]cuda:0" = _foreach_neg[9] + getitem_1638: "f32[][]cuda:0" = _foreach_neg[10] + getitem_1639: "f32[][]cuda:0" = _foreach_neg[11] + getitem_1640: "f32[][]cuda:0" = _foreach_neg[12] + getitem_1641: "f32[][]cuda:0" = _foreach_neg[13] + getitem_1642: "f32[][]cuda:0" = _foreach_neg[14] + getitem_1643: "f32[][]cuda:0" = _foreach_neg[15] + getitem_1644: "f32[][]cuda:0" = _foreach_neg[16] + getitem_1645: "f32[][]cuda:0" = _foreach_neg[17] + getitem_1646: "f32[][]cuda:0" = _foreach_neg[18] + getitem_1647: "f32[][]cuda:0" = _foreach_neg[19] + getitem_1648: "f32[][]cuda:0" = _foreach_neg[20] + getitem_1649: "f32[][]cuda:0" = _foreach_neg[21] + getitem_1650: "f32[][]cuda:0" = _foreach_neg[22] + getitem_1651: "f32[][]cuda:0" = _foreach_neg[23] + getitem_1652: "f32[][]cuda:0" = _foreach_neg[24] + getitem_1653: "f32[][]cuda:0" = _foreach_neg[25] + getitem_1654: "f32[][]cuda:0" = _foreach_neg[26] + getitem_1655: "f32[][]cuda:0" = _foreach_neg[27] + getitem_1656: "f32[][]cuda:0" = _foreach_neg[28] + getitem_1657: "f32[][]cuda:0" = _foreach_neg[29] + getitem_1658: "f32[][]cuda:0" = _foreach_neg[30] + getitem_1659: "f32[][]cuda:0" = _foreach_neg[31] + getitem_1660: "f32[][]cuda:0" = _foreach_neg[32] + getitem_1661: "f32[][]cuda:0" = _foreach_neg[33] + getitem_1662: "f32[][]cuda:0" = _foreach_neg[34] + getitem_1663: "f32[][]cuda:0" = _foreach_neg[35] + getitem_1664: "f32[][]cuda:0" = _foreach_neg[36] + getitem_1665: "f32[][]cuda:0" = _foreach_neg[37] + getitem_1666: "f32[][]cuda:0" = _foreach_neg[38] + getitem_1667: "f32[][]cuda:0" = _foreach_neg[39] + getitem_1668: "f32[][]cuda:0" = _foreach_neg[40] + getitem_1669: "f32[][]cuda:0" = _foreach_neg[41] + getitem_1670: "f32[][]cuda:0" = _foreach_neg[42] + getitem_1671: "f32[][]cuda:0" = _foreach_neg[43] + getitem_1672: "f32[][]cuda:0" = _foreach_neg[44] + getitem_1673: "f32[][]cuda:0" = _foreach_neg[45] + getitem_1674: "f32[][]cuda:0" = _foreach_neg[46] + getitem_1675: "f32[][]cuda:0" = _foreach_neg[47] + getitem_1676: "f32[][]cuda:0" = _foreach_neg[48] + getitem_1677: "f32[][]cuda:0" = _foreach_neg[49] + getitem_1678: "f32[][]cuda:0" = _foreach_neg[50] + getitem_1679: "f32[][]cuda:0" = _foreach_neg[51] + getitem_1680: "f32[][]cuda:0" = _foreach_neg[52] + getitem_1681: "f32[][]cuda:0" = _foreach_neg[53] + getitem_1682: "f32[][]cuda:0" = _foreach_neg[54] + getitem_1683: "f32[][]cuda:0" = _foreach_neg[55] + getitem_1684: "f32[][]cuda:0" = _foreach_neg[56] + getitem_1685: "f32[][]cuda:0" = _foreach_neg[57] + getitem_1686: "f32[][]cuda:0" = _foreach_neg[58] + getitem_1687: "f32[][]cuda:0" = _foreach_neg[59] + getitem_1688: "f32[][]cuda:0" = _foreach_neg[60] + getitem_1689: "f32[][]cuda:0" = _foreach_neg[61] + getitem_1690: "f32[][]cuda:0" = _foreach_neg[62] + getitem_1691: "f32[][]cuda:0" = _foreach_neg[63] + getitem_1692: "f32[][]cuda:0" = _foreach_neg[64] + getitem_1693: "f32[][]cuda:0" = _foreach_neg[65] + getitem_1694: "f32[][]cuda:0" = _foreach_neg[66] + getitem_1695: "f32[][]cuda:0" = _foreach_neg[67] + getitem_1696: "f32[][]cuda:0" = _foreach_neg[68] + getitem_1697: "f32[][]cuda:0" = _foreach_neg[69] + getitem_1698: "f32[][]cuda:0" = _foreach_neg[70] + getitem_1699: "f32[][]cuda:0" = _foreach_neg[71] + getitem_1700: "f32[][]cuda:0" = _foreach_neg[72] + getitem_1701: "f32[][]cuda:0" = _foreach_neg[73] + getitem_1702: "f32[][]cuda:0" = _foreach_neg[74] + getitem_1703: "f32[][]cuda:0" = _foreach_neg[75] + getitem_1704: "f32[][]cuda:0" = _foreach_neg[76] + getitem_1705: "f32[][]cuda:0" = _foreach_neg[77] + getitem_1706: "f32[][]cuda:0" = _foreach_neg[78] + getitem_1707: "f32[][]cuda:0" = _foreach_neg[79] + getitem_1708: "f32[][]cuda:0" = _foreach_neg[80] + getitem_1709: "f32[][]cuda:0" = _foreach_neg[81] + getitem_1710: "f32[][]cuda:0" = _foreach_neg[82] + getitem_1711: "f32[][]cuda:0" = _foreach_neg[83] + getitem_1712: "f32[][]cuda:0" = _foreach_neg[84] + getitem_1713: "f32[][]cuda:0" = _foreach_neg[85] + getitem_1714: "f32[][]cuda:0" = _foreach_neg[86] + getitem_1715: "f32[][]cuda:0" = _foreach_neg[87] + getitem_1716: "f32[][]cuda:0" = _foreach_neg[88] + getitem_1717: "f32[][]cuda:0" = _foreach_neg[89] + getitem_1718: "f32[][]cuda:0" = _foreach_neg[90] + getitem_1719: "f32[][]cuda:0" = _foreach_neg[91] + getitem_1720: "f32[][]cuda:0" = _foreach_neg[92] + getitem_1721: "f32[][]cuda:0" = _foreach_neg[93] + getitem_1722: "f32[][]cuda:0" = _foreach_neg[94] + getitem_1723: "f32[][]cuda:0" = _foreach_neg[95] + getitem_1724: "f32[][]cuda:0" = _foreach_neg[96] + getitem_1725: "f32[][]cuda:0" = _foreach_neg[97] + getitem_1726: "f32[][]cuda:0" = _foreach_neg[98] + getitem_1727: "f32[][]cuda:0" = _foreach_neg[99] + getitem_1728: "f32[][]cuda:0" = _foreach_neg[100] + getitem_1729: "f32[][]cuda:0" = _foreach_neg[101] + getitem_1730: "f32[][]cuda:0" = _foreach_neg[102] + getitem_1731: "f32[][]cuda:0" = _foreach_neg[103] + getitem_1732: "f32[][]cuda:0" = _foreach_neg[104] + getitem_1733: "f32[][]cuda:0" = _foreach_neg[105] + getitem_1734: "f32[][]cuda:0" = _foreach_neg[106] + getitem_1735: "f32[][]cuda:0" = _foreach_neg[107] + getitem_1736: "f32[][]cuda:0" = _foreach_neg[108] + getitem_1737: "f32[][]cuda:0" = _foreach_neg[109] + getitem_1738: "f32[][]cuda:0" = _foreach_neg[110] + getitem_1739: "f32[][]cuda:0" = _foreach_neg[111] + getitem_1740: "f32[][]cuda:0" = _foreach_neg[112] + getitem_1741: "f32[][]cuda:0" = _foreach_neg[113] + getitem_1742: "f32[][]cuda:0" = _foreach_neg[114] + getitem_1743: "f32[][]cuda:0" = _foreach_neg[115] + getitem_1744: "f32[][]cuda:0" = _foreach_neg[116] + getitem_1745: "f32[][]cuda:0" = _foreach_neg[117] + getitem_1746: "f32[][]cuda:0" = _foreach_neg[118] + getitem_1747: "f32[][]cuda:0" = _foreach_neg[119] + getitem_1748: "f32[][]cuda:0" = _foreach_neg[120] + getitem_1749: "f32[][]cuda:0" = _foreach_neg[121] + getitem_1750: "f32[][]cuda:0" = _foreach_neg[122] + getitem_1751: "f32[][]cuda:0" = _foreach_neg[123] + getitem_1752: "f32[][]cuda:0" = _foreach_neg[124] + getitem_1753: "f32[][]cuda:0" = _foreach_neg[125] + getitem_1754: "f32[][]cuda:0" = _foreach_neg[126] + getitem_1755: "f32[][]cuda:0" = _foreach_neg[127] + getitem_1756: "f32[][]cuda:0" = _foreach_neg[128] + getitem_1757: "f32[][]cuda:0" = _foreach_neg[129] + getitem_1758: "f32[][]cuda:0" = _foreach_neg[130] + getitem_1759: "f32[][]cuda:0" = _foreach_neg[131] + getitem_1760: "f32[][]cuda:0" = _foreach_neg[132] + getitem_1761: "f32[][]cuda:0" = _foreach_neg[133] + getitem_1762: "f32[][]cuda:0" = _foreach_neg[134] + getitem_1763: "f32[][]cuda:0" = _foreach_neg[135] + getitem_1764: "f32[][]cuda:0" = _foreach_neg[136] + getitem_1765: "f32[][]cuda:0" = _foreach_neg[137] + getitem_1766: "f32[][]cuda:0" = _foreach_neg[138] + getitem_1767: "f32[][]cuda:0" = _foreach_neg[139] + getitem_1768: "f32[][]cuda:0" = _foreach_neg[140] + getitem_1769: "f32[][]cuda:0" = _foreach_neg[141] + getitem_1770: "f32[][]cuda:0" = _foreach_neg[142] + getitem_1771: "f32[][]cuda:0" = _foreach_neg[143] + getitem_1772: "f32[][]cuda:0" = _foreach_neg[144] + getitem_1773: "f32[][]cuda:0" = _foreach_neg[145] + getitem_1774: "f32[][]cuda:0" = _foreach_neg[146] + getitem_1775: "f32[][]cuda:0" = _foreach_neg[147]; _foreach_neg = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr) + _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01); getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None + getitem_1776: "f32[][]cuda:0" = _foreach_div[0] + getitem_1777: "f32[][]cuda:0" = _foreach_div[1] + getitem_1778: "f32[][]cuda:0" = _foreach_div[2] + getitem_1779: "f32[][]cuda:0" = _foreach_div[3] + getitem_1780: "f32[][]cuda:0" = _foreach_div[4] + getitem_1781: "f32[][]cuda:0" = _foreach_div[5] + getitem_1782: "f32[][]cuda:0" = _foreach_div[6] + getitem_1783: "f32[][]cuda:0" = _foreach_div[7] + getitem_1784: "f32[][]cuda:0" = _foreach_div[8] + getitem_1785: "f32[][]cuda:0" = _foreach_div[9] + getitem_1786: "f32[][]cuda:0" = _foreach_div[10] + getitem_1787: "f32[][]cuda:0" = _foreach_div[11] + getitem_1788: "f32[][]cuda:0" = _foreach_div[12] + getitem_1789: "f32[][]cuda:0" = _foreach_div[13] + getitem_1790: "f32[][]cuda:0" = _foreach_div[14] + getitem_1791: "f32[][]cuda:0" = _foreach_div[15] + getitem_1792: "f32[][]cuda:0" = _foreach_div[16] + getitem_1793: "f32[][]cuda:0" = _foreach_div[17] + getitem_1794: "f32[][]cuda:0" = _foreach_div[18] + getitem_1795: "f32[][]cuda:0" = _foreach_div[19] + getitem_1796: "f32[][]cuda:0" = _foreach_div[20] + getitem_1797: "f32[][]cuda:0" = _foreach_div[21] + getitem_1798: "f32[][]cuda:0" = _foreach_div[22] + getitem_1799: "f32[][]cuda:0" = _foreach_div[23] + getitem_1800: "f32[][]cuda:0" = _foreach_div[24] + getitem_1801: "f32[][]cuda:0" = _foreach_div[25] + getitem_1802: "f32[][]cuda:0" = _foreach_div[26] + getitem_1803: "f32[][]cuda:0" = _foreach_div[27] + getitem_1804: "f32[][]cuda:0" = _foreach_div[28] + getitem_1805: "f32[][]cuda:0" = _foreach_div[29] + getitem_1806: "f32[][]cuda:0" = _foreach_div[30] + getitem_1807: "f32[][]cuda:0" = _foreach_div[31] + getitem_1808: "f32[][]cuda:0" = _foreach_div[32] + getitem_1809: "f32[][]cuda:0" = _foreach_div[33] + getitem_1810: "f32[][]cuda:0" = _foreach_div[34] + getitem_1811: "f32[][]cuda:0" = _foreach_div[35] + getitem_1812: "f32[][]cuda:0" = _foreach_div[36] + getitem_1813: "f32[][]cuda:0" = _foreach_div[37] + getitem_1814: "f32[][]cuda:0" = _foreach_div[38] + getitem_1815: "f32[][]cuda:0" = _foreach_div[39] + getitem_1816: "f32[][]cuda:0" = _foreach_div[40] + getitem_1817: "f32[][]cuda:0" = _foreach_div[41] + getitem_1818: "f32[][]cuda:0" = _foreach_div[42] + getitem_1819: "f32[][]cuda:0" = _foreach_div[43] + getitem_1820: "f32[][]cuda:0" = _foreach_div[44] + getitem_1821: "f32[][]cuda:0" = _foreach_div[45] + getitem_1822: "f32[][]cuda:0" = _foreach_div[46] + getitem_1823: "f32[][]cuda:0" = _foreach_div[47] + getitem_1824: "f32[][]cuda:0" = _foreach_div[48] + getitem_1825: "f32[][]cuda:0" = _foreach_div[49] + getitem_1826: "f32[][]cuda:0" = _foreach_div[50] + getitem_1827: "f32[][]cuda:0" = _foreach_div[51] + getitem_1828: "f32[][]cuda:0" = _foreach_div[52] + getitem_1829: "f32[][]cuda:0" = _foreach_div[53] + getitem_1830: "f32[][]cuda:0" = _foreach_div[54] + getitem_1831: "f32[][]cuda:0" = _foreach_div[55] + getitem_1832: "f32[][]cuda:0" = _foreach_div[56] + getitem_1833: "f32[][]cuda:0" = _foreach_div[57] + getitem_1834: "f32[][]cuda:0" = _foreach_div[58] + getitem_1835: "f32[][]cuda:0" = _foreach_div[59] + getitem_1836: "f32[][]cuda:0" = _foreach_div[60] + getitem_1837: "f32[][]cuda:0" = _foreach_div[61] + getitem_1838: "f32[][]cuda:0" = _foreach_div[62] + getitem_1839: "f32[][]cuda:0" = _foreach_div[63] + getitem_1840: "f32[][]cuda:0" = _foreach_div[64] + getitem_1841: "f32[][]cuda:0" = _foreach_div[65] + getitem_1842: "f32[][]cuda:0" = _foreach_div[66] + getitem_1843: "f32[][]cuda:0" = _foreach_div[67] + getitem_1844: "f32[][]cuda:0" = _foreach_div[68] + getitem_1845: "f32[][]cuda:0" = _foreach_div[69] + getitem_1846: "f32[][]cuda:0" = _foreach_div[70] + getitem_1847: "f32[][]cuda:0" = _foreach_div[71] + getitem_1848: "f32[][]cuda:0" = _foreach_div[72] + getitem_1849: "f32[][]cuda:0" = _foreach_div[73] + getitem_1850: "f32[][]cuda:0" = _foreach_div[74] + getitem_1851: "f32[][]cuda:0" = _foreach_div[75] + getitem_1852: "f32[][]cuda:0" = _foreach_div[76] + getitem_1853: "f32[][]cuda:0" = _foreach_div[77] + getitem_1854: "f32[][]cuda:0" = _foreach_div[78] + getitem_1855: "f32[][]cuda:0" = _foreach_div[79] + getitem_1856: "f32[][]cuda:0" = _foreach_div[80] + getitem_1857: "f32[][]cuda:0" = _foreach_div[81] + getitem_1858: "f32[][]cuda:0" = _foreach_div[82] + getitem_1859: "f32[][]cuda:0" = _foreach_div[83] + getitem_1860: "f32[][]cuda:0" = _foreach_div[84] + getitem_1861: "f32[][]cuda:0" = _foreach_div[85] + getitem_1862: "f32[][]cuda:0" = _foreach_div[86] + getitem_1863: "f32[][]cuda:0" = _foreach_div[87] + getitem_1864: "f32[][]cuda:0" = _foreach_div[88] + getitem_1865: "f32[][]cuda:0" = _foreach_div[89] + getitem_1866: "f32[][]cuda:0" = _foreach_div[90] + getitem_1867: "f32[][]cuda:0" = _foreach_div[91] + getitem_1868: "f32[][]cuda:0" = _foreach_div[92] + getitem_1869: "f32[][]cuda:0" = _foreach_div[93] + getitem_1870: "f32[][]cuda:0" = _foreach_div[94] + getitem_1871: "f32[][]cuda:0" = _foreach_div[95] + getitem_1872: "f32[][]cuda:0" = _foreach_div[96] + getitem_1873: "f32[][]cuda:0" = _foreach_div[97] + getitem_1874: "f32[][]cuda:0" = _foreach_div[98] + getitem_1875: "f32[][]cuda:0" = _foreach_div[99] + getitem_1876: "f32[][]cuda:0" = _foreach_div[100] + getitem_1877: "f32[][]cuda:0" = _foreach_div[101] + getitem_1878: "f32[][]cuda:0" = _foreach_div[102] + getitem_1879: "f32[][]cuda:0" = _foreach_div[103] + getitem_1880: "f32[][]cuda:0" = _foreach_div[104] + getitem_1881: "f32[][]cuda:0" = _foreach_div[105] + getitem_1882: "f32[][]cuda:0" = _foreach_div[106] + getitem_1883: "f32[][]cuda:0" = _foreach_div[107] + getitem_1884: "f32[][]cuda:0" = _foreach_div[108] + getitem_1885: "f32[][]cuda:0" = _foreach_div[109] + getitem_1886: "f32[][]cuda:0" = _foreach_div[110] + getitem_1887: "f32[][]cuda:0" = _foreach_div[111] + getitem_1888: "f32[][]cuda:0" = _foreach_div[112] + getitem_1889: "f32[][]cuda:0" = _foreach_div[113] + getitem_1890: "f32[][]cuda:0" = _foreach_div[114] + getitem_1891: "f32[][]cuda:0" = _foreach_div[115] + getitem_1892: "f32[][]cuda:0" = _foreach_div[116] + getitem_1893: "f32[][]cuda:0" = _foreach_div[117] + getitem_1894: "f32[][]cuda:0" = _foreach_div[118] + getitem_1895: "f32[][]cuda:0" = _foreach_div[119] + getitem_1896: "f32[][]cuda:0" = _foreach_div[120] + getitem_1897: "f32[][]cuda:0" = _foreach_div[121] + getitem_1898: "f32[][]cuda:0" = _foreach_div[122] + getitem_1899: "f32[][]cuda:0" = _foreach_div[123] + getitem_1900: "f32[][]cuda:0" = _foreach_div[124] + getitem_1901: "f32[][]cuda:0" = _foreach_div[125] + getitem_1902: "f32[][]cuda:0" = _foreach_div[126] + getitem_1903: "f32[][]cuda:0" = _foreach_div[127] + getitem_1904: "f32[][]cuda:0" = _foreach_div[128] + getitem_1905: "f32[][]cuda:0" = _foreach_div[129] + getitem_1906: "f32[][]cuda:0" = _foreach_div[130] + getitem_1907: "f32[][]cuda:0" = _foreach_div[131] + getitem_1908: "f32[][]cuda:0" = _foreach_div[132] + getitem_1909: "f32[][]cuda:0" = _foreach_div[133] + getitem_1910: "f32[][]cuda:0" = _foreach_div[134] + getitem_1911: "f32[][]cuda:0" = _foreach_div[135] + getitem_1912: "f32[][]cuda:0" = _foreach_div[136] + getitem_1913: "f32[][]cuda:0" = _foreach_div[137] + getitem_1914: "f32[][]cuda:0" = _foreach_div[138] + getitem_1915: "f32[][]cuda:0" = _foreach_div[139] + getitem_1916: "f32[][]cuda:0" = _foreach_div[140] + getitem_1917: "f32[][]cuda:0" = _foreach_div[141] + getitem_1918: "f32[][]cuda:0" = _foreach_div[142] + getitem_1919: "f32[][]cuda:0" = _foreach_div[143] + getitem_1920: "f32[][]cuda:0" = _foreach_div[144] + getitem_1921: "f32[][]cuda:0" = _foreach_div[145] + getitem_1922: "f32[][]cuda:0" = _foreach_div[146] + getitem_1923: "f32[][]cuda:0" = _foreach_div[147]; _foreach_div = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1) + _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]); getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None + getitem_1924: "f32[][]cuda:0" = _foreach_reciprocal[0] + getitem_1925: "f32[][]cuda:0" = _foreach_reciprocal[1] + getitem_1926: "f32[][]cuda:0" = _foreach_reciprocal[2] + getitem_1927: "f32[][]cuda:0" = _foreach_reciprocal[3] + getitem_1928: "f32[][]cuda:0" = _foreach_reciprocal[4] + getitem_1929: "f32[][]cuda:0" = _foreach_reciprocal[5] + getitem_1930: "f32[][]cuda:0" = _foreach_reciprocal[6] + getitem_1931: "f32[][]cuda:0" = _foreach_reciprocal[7] + getitem_1932: "f32[][]cuda:0" = _foreach_reciprocal[8] + getitem_1933: "f32[][]cuda:0" = _foreach_reciprocal[9] + getitem_1934: "f32[][]cuda:0" = _foreach_reciprocal[10] + getitem_1935: "f32[][]cuda:0" = _foreach_reciprocal[11] + getitem_1936: "f32[][]cuda:0" = _foreach_reciprocal[12] + getitem_1937: "f32[][]cuda:0" = _foreach_reciprocal[13] + getitem_1938: "f32[][]cuda:0" = _foreach_reciprocal[14] + getitem_1939: "f32[][]cuda:0" = _foreach_reciprocal[15] + getitem_1940: "f32[][]cuda:0" = _foreach_reciprocal[16] + getitem_1941: "f32[][]cuda:0" = _foreach_reciprocal[17] + getitem_1942: "f32[][]cuda:0" = _foreach_reciprocal[18] + getitem_1943: "f32[][]cuda:0" = _foreach_reciprocal[19] + getitem_1944: "f32[][]cuda:0" = _foreach_reciprocal[20] + getitem_1945: "f32[][]cuda:0" = _foreach_reciprocal[21] + getitem_1946: "f32[][]cuda:0" = _foreach_reciprocal[22] + getitem_1947: "f32[][]cuda:0" = _foreach_reciprocal[23] + getitem_1948: "f32[][]cuda:0" = _foreach_reciprocal[24] + getitem_1949: "f32[][]cuda:0" = _foreach_reciprocal[25] + getitem_1950: "f32[][]cuda:0" = _foreach_reciprocal[26] + getitem_1951: "f32[][]cuda:0" = _foreach_reciprocal[27] + getitem_1952: "f32[][]cuda:0" = _foreach_reciprocal[28] + getitem_1953: "f32[][]cuda:0" = _foreach_reciprocal[29] + getitem_1954: "f32[][]cuda:0" = _foreach_reciprocal[30] + getitem_1955: "f32[][]cuda:0" = _foreach_reciprocal[31] + getitem_1956: "f32[][]cuda:0" = _foreach_reciprocal[32] + getitem_1957: "f32[][]cuda:0" = _foreach_reciprocal[33] + getitem_1958: "f32[][]cuda:0" = _foreach_reciprocal[34] + getitem_1959: "f32[][]cuda:0" = _foreach_reciprocal[35] + getitem_1960: "f32[][]cuda:0" = _foreach_reciprocal[36] + getitem_1961: "f32[][]cuda:0" = _foreach_reciprocal[37] + getitem_1962: "f32[][]cuda:0" = _foreach_reciprocal[38] + getitem_1963: "f32[][]cuda:0" = _foreach_reciprocal[39] + getitem_1964: "f32[][]cuda:0" = _foreach_reciprocal[40] + getitem_1965: "f32[][]cuda:0" = _foreach_reciprocal[41] + getitem_1966: "f32[][]cuda:0" = _foreach_reciprocal[42] + getitem_1967: "f32[][]cuda:0" = _foreach_reciprocal[43] + getitem_1968: "f32[][]cuda:0" = _foreach_reciprocal[44] + getitem_1969: "f32[][]cuda:0" = _foreach_reciprocal[45] + getitem_1970: "f32[][]cuda:0" = _foreach_reciprocal[46] + getitem_1971: "f32[][]cuda:0" = _foreach_reciprocal[47] + getitem_1972: "f32[][]cuda:0" = _foreach_reciprocal[48] + getitem_1973: "f32[][]cuda:0" = _foreach_reciprocal[49] + getitem_1974: "f32[][]cuda:0" = _foreach_reciprocal[50] + getitem_1975: "f32[][]cuda:0" = _foreach_reciprocal[51] + getitem_1976: "f32[][]cuda:0" = _foreach_reciprocal[52] + getitem_1977: "f32[][]cuda:0" = _foreach_reciprocal[53] + getitem_1978: "f32[][]cuda:0" = _foreach_reciprocal[54] + getitem_1979: "f32[][]cuda:0" = _foreach_reciprocal[55] + getitem_1980: "f32[][]cuda:0" = _foreach_reciprocal[56] + getitem_1981: "f32[][]cuda:0" = _foreach_reciprocal[57] + getitem_1982: "f32[][]cuda:0" = _foreach_reciprocal[58] + getitem_1983: "f32[][]cuda:0" = _foreach_reciprocal[59] + getitem_1984: "f32[][]cuda:0" = _foreach_reciprocal[60] + getitem_1985: "f32[][]cuda:0" = _foreach_reciprocal[61] + getitem_1986: "f32[][]cuda:0" = _foreach_reciprocal[62] + getitem_1987: "f32[][]cuda:0" = _foreach_reciprocal[63] + getitem_1988: "f32[][]cuda:0" = _foreach_reciprocal[64] + getitem_1989: "f32[][]cuda:0" = _foreach_reciprocal[65] + getitem_1990: "f32[][]cuda:0" = _foreach_reciprocal[66] + getitem_1991: "f32[][]cuda:0" = _foreach_reciprocal[67] + getitem_1992: "f32[][]cuda:0" = _foreach_reciprocal[68] + getitem_1993: "f32[][]cuda:0" = _foreach_reciprocal[69] + getitem_1994: "f32[][]cuda:0" = _foreach_reciprocal[70] + getitem_1995: "f32[][]cuda:0" = _foreach_reciprocal[71] + getitem_1996: "f32[][]cuda:0" = _foreach_reciprocal[72] + getitem_1997: "f32[][]cuda:0" = _foreach_reciprocal[73] + getitem_1998: "f32[][]cuda:0" = _foreach_reciprocal[74] + getitem_1999: "f32[][]cuda:0" = _foreach_reciprocal[75] + getitem_2000: "f32[][]cuda:0" = _foreach_reciprocal[76] + getitem_2001: "f32[][]cuda:0" = _foreach_reciprocal[77] + getitem_2002: "f32[][]cuda:0" = _foreach_reciprocal[78] + getitem_2003: "f32[][]cuda:0" = _foreach_reciprocal[79] + getitem_2004: "f32[][]cuda:0" = _foreach_reciprocal[80] + getitem_2005: "f32[][]cuda:0" = _foreach_reciprocal[81] + getitem_2006: "f32[][]cuda:0" = _foreach_reciprocal[82] + getitem_2007: "f32[][]cuda:0" = _foreach_reciprocal[83] + getitem_2008: "f32[][]cuda:0" = _foreach_reciprocal[84] + getitem_2009: "f32[][]cuda:0" = _foreach_reciprocal[85] + getitem_2010: "f32[][]cuda:0" = _foreach_reciprocal[86] + getitem_2011: "f32[][]cuda:0" = _foreach_reciprocal[87] + getitem_2012: "f32[][]cuda:0" = _foreach_reciprocal[88] + getitem_2013: "f32[][]cuda:0" = _foreach_reciprocal[89] + getitem_2014: "f32[][]cuda:0" = _foreach_reciprocal[90] + getitem_2015: "f32[][]cuda:0" = _foreach_reciprocal[91] + getitem_2016: "f32[][]cuda:0" = _foreach_reciprocal[92] + getitem_2017: "f32[][]cuda:0" = _foreach_reciprocal[93] + getitem_2018: "f32[][]cuda:0" = _foreach_reciprocal[94] + getitem_2019: "f32[][]cuda:0" = _foreach_reciprocal[95] + getitem_2020: "f32[][]cuda:0" = _foreach_reciprocal[96] + getitem_2021: "f32[][]cuda:0" = _foreach_reciprocal[97] + getitem_2022: "f32[][]cuda:0" = _foreach_reciprocal[98] + getitem_2023: "f32[][]cuda:0" = _foreach_reciprocal[99] + getitem_2024: "f32[][]cuda:0" = _foreach_reciprocal[100] + getitem_2025: "f32[][]cuda:0" = _foreach_reciprocal[101] + getitem_2026: "f32[][]cuda:0" = _foreach_reciprocal[102] + getitem_2027: "f32[][]cuda:0" = _foreach_reciprocal[103] + getitem_2028: "f32[][]cuda:0" = _foreach_reciprocal[104] + getitem_2029: "f32[][]cuda:0" = _foreach_reciprocal[105] + getitem_2030: "f32[][]cuda:0" = _foreach_reciprocal[106] + getitem_2031: "f32[][]cuda:0" = _foreach_reciprocal[107] + getitem_2032: "f32[][]cuda:0" = _foreach_reciprocal[108] + getitem_2033: "f32[][]cuda:0" = _foreach_reciprocal[109] + getitem_2034: "f32[][]cuda:0" = _foreach_reciprocal[110] + getitem_2035: "f32[][]cuda:0" = _foreach_reciprocal[111] + getitem_2036: "f32[][]cuda:0" = _foreach_reciprocal[112] + getitem_2037: "f32[][]cuda:0" = _foreach_reciprocal[113] + getitem_2038: "f32[][]cuda:0" = _foreach_reciprocal[114] + getitem_2039: "f32[][]cuda:0" = _foreach_reciprocal[115] + getitem_2040: "f32[][]cuda:0" = _foreach_reciprocal[116] + getitem_2041: "f32[][]cuda:0" = _foreach_reciprocal[117] + getitem_2042: "f32[][]cuda:0" = _foreach_reciprocal[118] + getitem_2043: "f32[][]cuda:0" = _foreach_reciprocal[119] + getitem_2044: "f32[][]cuda:0" = _foreach_reciprocal[120] + getitem_2045: "f32[][]cuda:0" = _foreach_reciprocal[121] + getitem_2046: "f32[][]cuda:0" = _foreach_reciprocal[122] + getitem_2047: "f32[][]cuda:0" = _foreach_reciprocal[123] + getitem_2048: "f32[][]cuda:0" = _foreach_reciprocal[124] + getitem_2049: "f32[][]cuda:0" = _foreach_reciprocal[125] + getitem_2050: "f32[][]cuda:0" = _foreach_reciprocal[126] + getitem_2051: "f32[][]cuda:0" = _foreach_reciprocal[127] + getitem_2052: "f32[][]cuda:0" = _foreach_reciprocal[128] + getitem_2053: "f32[][]cuda:0" = _foreach_reciprocal[129] + getitem_2054: "f32[][]cuda:0" = _foreach_reciprocal[130] + getitem_2055: "f32[][]cuda:0" = _foreach_reciprocal[131] + getitem_2056: "f32[][]cuda:0" = _foreach_reciprocal[132] + getitem_2057: "f32[][]cuda:0" = _foreach_reciprocal[133] + getitem_2058: "f32[][]cuda:0" = _foreach_reciprocal[134] + getitem_2059: "f32[][]cuda:0" = _foreach_reciprocal[135] + getitem_2060: "f32[][]cuda:0" = _foreach_reciprocal[136] + getitem_2061: "f32[][]cuda:0" = _foreach_reciprocal[137] + getitem_2062: "f32[][]cuda:0" = _foreach_reciprocal[138] + getitem_2063: "f32[][]cuda:0" = _foreach_reciprocal[139] + getitem_2064: "f32[][]cuda:0" = _foreach_reciprocal[140] + getitem_2065: "f32[][]cuda:0" = _foreach_reciprocal[141] + getitem_2066: "f32[][]cuda:0" = _foreach_reciprocal[142] + getitem_2067: "f32[][]cuda:0" = _foreach_reciprocal[143] + getitem_2068: "f32[][]cuda:0" = _foreach_reciprocal[144] + getitem_2069: "f32[][]cuda:0" = _foreach_reciprocal[145] + getitem_2070: "f32[][]cuda:0" = _foreach_reciprocal[146] + getitem_2071: "f32[][]cuda:0" = _foreach_reciprocal[147]; _foreach_reciprocal = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2) + _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]); getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None + getitem_2072: "f32[][]cuda:0" = _foreach_sqrt[0] + getitem_2073: "f32[][]cuda:0" = _foreach_sqrt[1] + getitem_2074: "f32[][]cuda:0" = _foreach_sqrt[2] + getitem_2075: "f32[][]cuda:0" = _foreach_sqrt[3] + getitem_2076: "f32[][]cuda:0" = _foreach_sqrt[4] + getitem_2077: "f32[][]cuda:0" = _foreach_sqrt[5] + getitem_2078: "f32[][]cuda:0" = _foreach_sqrt[6] + getitem_2079: "f32[][]cuda:0" = _foreach_sqrt[7] + getitem_2080: "f32[][]cuda:0" = _foreach_sqrt[8] + getitem_2081: "f32[][]cuda:0" = _foreach_sqrt[9] + getitem_2082: "f32[][]cuda:0" = _foreach_sqrt[10] + getitem_2083: "f32[][]cuda:0" = _foreach_sqrt[11] + getitem_2084: "f32[][]cuda:0" = _foreach_sqrt[12] + getitem_2085: "f32[][]cuda:0" = _foreach_sqrt[13] + getitem_2086: "f32[][]cuda:0" = _foreach_sqrt[14] + getitem_2087: "f32[][]cuda:0" = _foreach_sqrt[15] + getitem_2088: "f32[][]cuda:0" = _foreach_sqrt[16] + getitem_2089: "f32[][]cuda:0" = _foreach_sqrt[17] + getitem_2090: "f32[][]cuda:0" = _foreach_sqrt[18] + getitem_2091: "f32[][]cuda:0" = _foreach_sqrt[19] + getitem_2092: "f32[][]cuda:0" = _foreach_sqrt[20] + getitem_2093: "f32[][]cuda:0" = _foreach_sqrt[21] + getitem_2094: "f32[][]cuda:0" = _foreach_sqrt[22] + getitem_2095: "f32[][]cuda:0" = _foreach_sqrt[23] + getitem_2096: "f32[][]cuda:0" = _foreach_sqrt[24] + getitem_2097: "f32[][]cuda:0" = _foreach_sqrt[25] + getitem_2098: "f32[][]cuda:0" = _foreach_sqrt[26] + getitem_2099: "f32[][]cuda:0" = _foreach_sqrt[27] + getitem_2100: "f32[][]cuda:0" = _foreach_sqrt[28] + getitem_2101: "f32[][]cuda:0" = _foreach_sqrt[29] + getitem_2102: "f32[][]cuda:0" = _foreach_sqrt[30] + getitem_2103: "f32[][]cuda:0" = _foreach_sqrt[31] + getitem_2104: "f32[][]cuda:0" = _foreach_sqrt[32] + getitem_2105: "f32[][]cuda:0" = _foreach_sqrt[33] + getitem_2106: "f32[][]cuda:0" = _foreach_sqrt[34] + getitem_2107: "f32[][]cuda:0" = _foreach_sqrt[35] + getitem_2108: "f32[][]cuda:0" = _foreach_sqrt[36] + getitem_2109: "f32[][]cuda:0" = _foreach_sqrt[37] + getitem_2110: "f32[][]cuda:0" = _foreach_sqrt[38] + getitem_2111: "f32[][]cuda:0" = _foreach_sqrt[39] + getitem_2112: "f32[][]cuda:0" = _foreach_sqrt[40] + getitem_2113: "f32[][]cuda:0" = _foreach_sqrt[41] + getitem_2114: "f32[][]cuda:0" = _foreach_sqrt[42] + getitem_2115: "f32[][]cuda:0" = _foreach_sqrt[43] + getitem_2116: "f32[][]cuda:0" = _foreach_sqrt[44] + getitem_2117: "f32[][]cuda:0" = _foreach_sqrt[45] + getitem_2118: "f32[][]cuda:0" = _foreach_sqrt[46] + getitem_2119: "f32[][]cuda:0" = _foreach_sqrt[47] + getitem_2120: "f32[][]cuda:0" = _foreach_sqrt[48] + getitem_2121: "f32[][]cuda:0" = _foreach_sqrt[49] + getitem_2122: "f32[][]cuda:0" = _foreach_sqrt[50] + getitem_2123: "f32[][]cuda:0" = _foreach_sqrt[51] + getitem_2124: "f32[][]cuda:0" = _foreach_sqrt[52] + getitem_2125: "f32[][]cuda:0" = _foreach_sqrt[53] + getitem_2126: "f32[][]cuda:0" = _foreach_sqrt[54] + getitem_2127: "f32[][]cuda:0" = _foreach_sqrt[55] + getitem_2128: "f32[][]cuda:0" = _foreach_sqrt[56] + getitem_2129: "f32[][]cuda:0" = _foreach_sqrt[57] + getitem_2130: "f32[][]cuda:0" = _foreach_sqrt[58] + getitem_2131: "f32[][]cuda:0" = _foreach_sqrt[59] + getitem_2132: "f32[][]cuda:0" = _foreach_sqrt[60] + getitem_2133: "f32[][]cuda:0" = _foreach_sqrt[61] + getitem_2134: "f32[][]cuda:0" = _foreach_sqrt[62] + getitem_2135: "f32[][]cuda:0" = _foreach_sqrt[63] + getitem_2136: "f32[][]cuda:0" = _foreach_sqrt[64] + getitem_2137: "f32[][]cuda:0" = _foreach_sqrt[65] + getitem_2138: "f32[][]cuda:0" = _foreach_sqrt[66] + getitem_2139: "f32[][]cuda:0" = _foreach_sqrt[67] + getitem_2140: "f32[][]cuda:0" = _foreach_sqrt[68] + getitem_2141: "f32[][]cuda:0" = _foreach_sqrt[69] + getitem_2142: "f32[][]cuda:0" = _foreach_sqrt[70] + getitem_2143: "f32[][]cuda:0" = _foreach_sqrt[71] + getitem_2144: "f32[][]cuda:0" = _foreach_sqrt[72] + getitem_2145: "f32[][]cuda:0" = _foreach_sqrt[73] + getitem_2146: "f32[][]cuda:0" = _foreach_sqrt[74] + getitem_2147: "f32[][]cuda:0" = _foreach_sqrt[75] + getitem_2148: "f32[][]cuda:0" = _foreach_sqrt[76] + getitem_2149: "f32[][]cuda:0" = _foreach_sqrt[77] + getitem_2150: "f32[][]cuda:0" = _foreach_sqrt[78] + getitem_2151: "f32[][]cuda:0" = _foreach_sqrt[79] + getitem_2152: "f32[][]cuda:0" = _foreach_sqrt[80] + getitem_2153: "f32[][]cuda:0" = _foreach_sqrt[81] + getitem_2154: "f32[][]cuda:0" = _foreach_sqrt[82] + getitem_2155: "f32[][]cuda:0" = _foreach_sqrt[83] + getitem_2156: "f32[][]cuda:0" = _foreach_sqrt[84] + getitem_2157: "f32[][]cuda:0" = _foreach_sqrt[85] + getitem_2158: "f32[][]cuda:0" = _foreach_sqrt[86] + getitem_2159: "f32[][]cuda:0" = _foreach_sqrt[87] + getitem_2160: "f32[][]cuda:0" = _foreach_sqrt[88] + getitem_2161: "f32[][]cuda:0" = _foreach_sqrt[89] + getitem_2162: "f32[][]cuda:0" = _foreach_sqrt[90] + getitem_2163: "f32[][]cuda:0" = _foreach_sqrt[91] + getitem_2164: "f32[][]cuda:0" = _foreach_sqrt[92] + getitem_2165: "f32[][]cuda:0" = _foreach_sqrt[93] + getitem_2166: "f32[][]cuda:0" = _foreach_sqrt[94] + getitem_2167: "f32[][]cuda:0" = _foreach_sqrt[95] + getitem_2168: "f32[][]cuda:0" = _foreach_sqrt[96] + getitem_2169: "f32[][]cuda:0" = _foreach_sqrt[97] + getitem_2170: "f32[][]cuda:0" = _foreach_sqrt[98] + getitem_2171: "f32[][]cuda:0" = _foreach_sqrt[99] + getitem_2172: "f32[][]cuda:0" = _foreach_sqrt[100] + getitem_2173: "f32[][]cuda:0" = _foreach_sqrt[101] + getitem_2174: "f32[][]cuda:0" = _foreach_sqrt[102] + getitem_2175: "f32[][]cuda:0" = _foreach_sqrt[103] + getitem_2176: "f32[][]cuda:0" = _foreach_sqrt[104] + getitem_2177: "f32[][]cuda:0" = _foreach_sqrt[105] + getitem_2178: "f32[][]cuda:0" = _foreach_sqrt[106] + getitem_2179: "f32[][]cuda:0" = _foreach_sqrt[107] + getitem_2180: "f32[][]cuda:0" = _foreach_sqrt[108] + getitem_2181: "f32[][]cuda:0" = _foreach_sqrt[109] + getitem_2182: "f32[][]cuda:0" = _foreach_sqrt[110] + getitem_2183: "f32[][]cuda:0" = _foreach_sqrt[111] + getitem_2184: "f32[][]cuda:0" = _foreach_sqrt[112] + getitem_2185: "f32[][]cuda:0" = _foreach_sqrt[113] + getitem_2186: "f32[][]cuda:0" = _foreach_sqrt[114] + getitem_2187: "f32[][]cuda:0" = _foreach_sqrt[115] + getitem_2188: "f32[][]cuda:0" = _foreach_sqrt[116] + getitem_2189: "f32[][]cuda:0" = _foreach_sqrt[117] + getitem_2190: "f32[][]cuda:0" = _foreach_sqrt[118] + getitem_2191: "f32[][]cuda:0" = _foreach_sqrt[119] + getitem_2192: "f32[][]cuda:0" = _foreach_sqrt[120] + getitem_2193: "f32[][]cuda:0" = _foreach_sqrt[121] + getitem_2194: "f32[][]cuda:0" = _foreach_sqrt[122] + getitem_2195: "f32[][]cuda:0" = _foreach_sqrt[123] + getitem_2196: "f32[][]cuda:0" = _foreach_sqrt[124] + getitem_2197: "f32[][]cuda:0" = _foreach_sqrt[125] + getitem_2198: "f32[][]cuda:0" = _foreach_sqrt[126] + getitem_2199: "f32[][]cuda:0" = _foreach_sqrt[127] + getitem_2200: "f32[][]cuda:0" = _foreach_sqrt[128] + getitem_2201: "f32[][]cuda:0" = _foreach_sqrt[129] + getitem_2202: "f32[][]cuda:0" = _foreach_sqrt[130] + getitem_2203: "f32[][]cuda:0" = _foreach_sqrt[131] + getitem_2204: "f32[][]cuda:0" = _foreach_sqrt[132] + getitem_2205: "f32[][]cuda:0" = _foreach_sqrt[133] + getitem_2206: "f32[][]cuda:0" = _foreach_sqrt[134] + getitem_2207: "f32[][]cuda:0" = _foreach_sqrt[135] + getitem_2208: "f32[][]cuda:0" = _foreach_sqrt[136] + getitem_2209: "f32[][]cuda:0" = _foreach_sqrt[137] + getitem_2210: "f32[][]cuda:0" = _foreach_sqrt[138] + getitem_2211: "f32[][]cuda:0" = _foreach_sqrt[139] + getitem_2212: "f32[][]cuda:0" = _foreach_sqrt[140] + getitem_2213: "f32[][]cuda:0" = _foreach_sqrt[141] + getitem_2214: "f32[][]cuda:0" = _foreach_sqrt[142] + getitem_2215: "f32[][]cuda:0" = _foreach_sqrt[143] + getitem_2216: "f32[][]cuda:0" = _foreach_sqrt[144] + getitem_2217: "f32[][]cuda:0" = _foreach_sqrt[145] + getitem_2218: "f32[][]cuda:0" = _foreach_sqrt[146] + getitem_2219: "f32[][]cuda:0" = _foreach_sqrt[147]; _foreach_sqrt = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs) + _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035]) + getitem_2220: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt_1[0] + getitem_2221: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt_1[1] + getitem_2222: "f32[768][1]cuda:0" = _foreach_sqrt_1[2] + getitem_2223: "f32[768][1]cuda:0" = _foreach_sqrt_1[3] + getitem_2224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[4] + getitem_2225: "f32[2304][1]cuda:0" = _foreach_sqrt_1[5] + getitem_2226: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[6] + getitem_2227: "f32[768][1]cuda:0" = _foreach_sqrt_1[7] + getitem_2228: "f32[768][1]cuda:0" = _foreach_sqrt_1[8] + getitem_2229: "f32[768][1]cuda:0" = _foreach_sqrt_1[9] + getitem_2230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[10] + getitem_2231: "f32[3072][1]cuda:0" = _foreach_sqrt_1[11] + getitem_2232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[12] + getitem_2233: "f32[768][1]cuda:0" = _foreach_sqrt_1[13] + getitem_2234: "f32[768][1]cuda:0" = _foreach_sqrt_1[14] + getitem_2235: "f32[768][1]cuda:0" = _foreach_sqrt_1[15] + getitem_2236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[16] + getitem_2237: "f32[2304][1]cuda:0" = _foreach_sqrt_1[17] + getitem_2238: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[18] + getitem_2239: "f32[768][1]cuda:0" = _foreach_sqrt_1[19] + getitem_2240: "f32[768][1]cuda:0" = _foreach_sqrt_1[20] + getitem_2241: "f32[768][1]cuda:0" = _foreach_sqrt_1[21] + getitem_2242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[22] + getitem_2243: "f32[3072][1]cuda:0" = _foreach_sqrt_1[23] + getitem_2244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[24] + getitem_2245: "f32[768][1]cuda:0" = _foreach_sqrt_1[25] + getitem_2246: "f32[768][1]cuda:0" = _foreach_sqrt_1[26] + getitem_2247: "f32[768][1]cuda:0" = _foreach_sqrt_1[27] + getitem_2248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[28] + getitem_2249: "f32[2304][1]cuda:0" = _foreach_sqrt_1[29] + getitem_2250: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[30] + getitem_2251: "f32[768][1]cuda:0" = _foreach_sqrt_1[31] + getitem_2252: "f32[768][1]cuda:0" = _foreach_sqrt_1[32] + getitem_2253: "f32[768][1]cuda:0" = _foreach_sqrt_1[33] + getitem_2254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[34] + getitem_2255: "f32[3072][1]cuda:0" = _foreach_sqrt_1[35] + getitem_2256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[36] + getitem_2257: "f32[768][1]cuda:0" = _foreach_sqrt_1[37] + getitem_2258: "f32[768][1]cuda:0" = _foreach_sqrt_1[38] + getitem_2259: "f32[768][1]cuda:0" = _foreach_sqrt_1[39] + getitem_2260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[40] + getitem_2261: "f32[2304][1]cuda:0" = _foreach_sqrt_1[41] + getitem_2262: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[42] + getitem_2263: "f32[768][1]cuda:0" = _foreach_sqrt_1[43] + getitem_2264: "f32[768][1]cuda:0" = _foreach_sqrt_1[44] + getitem_2265: "f32[768][1]cuda:0" = _foreach_sqrt_1[45] + getitem_2266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[46] + getitem_2267: "f32[3072][1]cuda:0" = _foreach_sqrt_1[47] + getitem_2268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[48] + getitem_2269: "f32[768][1]cuda:0" = _foreach_sqrt_1[49] + getitem_2270: "f32[768][1]cuda:0" = _foreach_sqrt_1[50] + getitem_2271: "f32[768][1]cuda:0" = _foreach_sqrt_1[51] + getitem_2272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[52] + getitem_2273: "f32[2304][1]cuda:0" = _foreach_sqrt_1[53] + getitem_2274: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[54] + getitem_2275: "f32[768][1]cuda:0" = _foreach_sqrt_1[55] + getitem_2276: "f32[768][1]cuda:0" = _foreach_sqrt_1[56] + getitem_2277: "f32[768][1]cuda:0" = _foreach_sqrt_1[57] + getitem_2278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[58] + getitem_2279: "f32[3072][1]cuda:0" = _foreach_sqrt_1[59] + getitem_2280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[60] + getitem_2281: "f32[768][1]cuda:0" = _foreach_sqrt_1[61] + getitem_2282: "f32[768][1]cuda:0" = _foreach_sqrt_1[62] + getitem_2283: "f32[768][1]cuda:0" = _foreach_sqrt_1[63] + getitem_2284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[64] + getitem_2285: "f32[2304][1]cuda:0" = _foreach_sqrt_1[65] + getitem_2286: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[66] + getitem_2287: "f32[768][1]cuda:0" = _foreach_sqrt_1[67] + getitem_2288: "f32[768][1]cuda:0" = _foreach_sqrt_1[68] + getitem_2289: "f32[768][1]cuda:0" = _foreach_sqrt_1[69] + getitem_2290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[70] + getitem_2291: "f32[3072][1]cuda:0" = _foreach_sqrt_1[71] + getitem_2292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[72] + getitem_2293: "f32[768][1]cuda:0" = _foreach_sqrt_1[73] + getitem_2294: "f32[768][1]cuda:0" = _foreach_sqrt_1[74] + getitem_2295: "f32[768][1]cuda:0" = _foreach_sqrt_1[75] + getitem_2296: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[76] + getitem_2297: "f32[2304][1]cuda:0" = _foreach_sqrt_1[77] + getitem_2298: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[78] + getitem_2299: "f32[768][1]cuda:0" = _foreach_sqrt_1[79] + getitem_2300: "f32[768][1]cuda:0" = _foreach_sqrt_1[80] + getitem_2301: "f32[768][1]cuda:0" = _foreach_sqrt_1[81] + getitem_2302: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[82] + getitem_2303: "f32[3072][1]cuda:0" = _foreach_sqrt_1[83] + getitem_2304: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[84] + getitem_2305: "f32[768][1]cuda:0" = _foreach_sqrt_1[85] + getitem_2306: "f32[768][1]cuda:0" = _foreach_sqrt_1[86] + getitem_2307: "f32[768][1]cuda:0" = _foreach_sqrt_1[87] + getitem_2308: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[88] + getitem_2309: "f32[2304][1]cuda:0" = _foreach_sqrt_1[89] + getitem_2310: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[90] + getitem_2311: "f32[768][1]cuda:0" = _foreach_sqrt_1[91] + getitem_2312: "f32[768][1]cuda:0" = _foreach_sqrt_1[92] + getitem_2313: "f32[768][1]cuda:0" = _foreach_sqrt_1[93] + getitem_2314: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[94] + getitem_2315: "f32[3072][1]cuda:0" = _foreach_sqrt_1[95] + getitem_2316: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[96] + getitem_2317: "f32[768][1]cuda:0" = _foreach_sqrt_1[97] + getitem_2318: "f32[768][1]cuda:0" = _foreach_sqrt_1[98] + getitem_2319: "f32[768][1]cuda:0" = _foreach_sqrt_1[99] + getitem_2320: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[100] + getitem_2321: "f32[2304][1]cuda:0" = _foreach_sqrt_1[101] + getitem_2322: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[102] + getitem_2323: "f32[768][1]cuda:0" = _foreach_sqrt_1[103] + getitem_2324: "f32[768][1]cuda:0" = _foreach_sqrt_1[104] + getitem_2325: "f32[768][1]cuda:0" = _foreach_sqrt_1[105] + getitem_2326: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[106] + getitem_2327: "f32[3072][1]cuda:0" = _foreach_sqrt_1[107] + getitem_2328: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[108] + getitem_2329: "f32[768][1]cuda:0" = _foreach_sqrt_1[109] + getitem_2330: "f32[768][1]cuda:0" = _foreach_sqrt_1[110] + getitem_2331: "f32[768][1]cuda:0" = _foreach_sqrt_1[111] + getitem_2332: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[112] + getitem_2333: "f32[2304][1]cuda:0" = _foreach_sqrt_1[113] + getitem_2334: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[114] + getitem_2335: "f32[768][1]cuda:0" = _foreach_sqrt_1[115] + getitem_2336: "f32[768][1]cuda:0" = _foreach_sqrt_1[116] + getitem_2337: "f32[768][1]cuda:0" = _foreach_sqrt_1[117] + getitem_2338: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[118] + getitem_2339: "f32[3072][1]cuda:0" = _foreach_sqrt_1[119] + getitem_2340: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[120] + getitem_2341: "f32[768][1]cuda:0" = _foreach_sqrt_1[121] + getitem_2342: "f32[768][1]cuda:0" = _foreach_sqrt_1[122] + getitem_2343: "f32[768][1]cuda:0" = _foreach_sqrt_1[123] + getitem_2344: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[124] + getitem_2345: "f32[2304][1]cuda:0" = _foreach_sqrt_1[125] + getitem_2346: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[126] + getitem_2347: "f32[768][1]cuda:0" = _foreach_sqrt_1[127] + getitem_2348: "f32[768][1]cuda:0" = _foreach_sqrt_1[128] + getitem_2349: "f32[768][1]cuda:0" = _foreach_sqrt_1[129] + getitem_2350: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[130] + getitem_2351: "f32[3072][1]cuda:0" = _foreach_sqrt_1[131] + getitem_2352: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[132] + getitem_2353: "f32[768][1]cuda:0" = _foreach_sqrt_1[133] + getitem_2354: "f32[768][1]cuda:0" = _foreach_sqrt_1[134] + getitem_2355: "f32[768][1]cuda:0" = _foreach_sqrt_1[135] + getitem_2356: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[136] + getitem_2357: "f32[2304][1]cuda:0" = _foreach_sqrt_1[137] + getitem_2358: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[138] + getitem_2359: "f32[768][1]cuda:0" = _foreach_sqrt_1[139] + getitem_2360: "f32[768][1]cuda:0" = _foreach_sqrt_1[140] + getitem_2361: "f32[768][1]cuda:0" = _foreach_sqrt_1[141] + getitem_2362: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[142] + getitem_2363: "f32[3072][1]cuda:0" = _foreach_sqrt_1[143] + getitem_2364: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[144] + getitem_2365: "f32[768][1]cuda:0" = _foreach_sqrt_1[145] + getitem_2366: "f32[768][1]cuda:0" = _foreach_sqrt_1[146] + getitem_2367: "f32[768][1]cuda:0" = _foreach_sqrt_1[147]; _foreach_sqrt_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt) + _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]); getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None + getitem_2368: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_1[0] + getitem_2369: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_1[1] + getitem_2370: "f32[768][1]cuda:0" = _foreach_div_1[2] + getitem_2371: "f32[768][1]cuda:0" = _foreach_div_1[3] + getitem_2372: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[4] + getitem_2373: "f32[2304][1]cuda:0" = _foreach_div_1[5] + getitem_2374: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[6] + getitem_2375: "f32[768][1]cuda:0" = _foreach_div_1[7] + getitem_2376: "f32[768][1]cuda:0" = _foreach_div_1[8] + getitem_2377: "f32[768][1]cuda:0" = _foreach_div_1[9] + getitem_2378: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[10] + getitem_2379: "f32[3072][1]cuda:0" = _foreach_div_1[11] + getitem_2380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[12] + getitem_2381: "f32[768][1]cuda:0" = _foreach_div_1[13] + getitem_2382: "f32[768][1]cuda:0" = _foreach_div_1[14] + getitem_2383: "f32[768][1]cuda:0" = _foreach_div_1[15] + getitem_2384: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[16] + getitem_2385: "f32[2304][1]cuda:0" = _foreach_div_1[17] + getitem_2386: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[18] + getitem_2387: "f32[768][1]cuda:0" = _foreach_div_1[19] + getitem_2388: "f32[768][1]cuda:0" = _foreach_div_1[20] + getitem_2389: "f32[768][1]cuda:0" = _foreach_div_1[21] + getitem_2390: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[22] + getitem_2391: "f32[3072][1]cuda:0" = _foreach_div_1[23] + getitem_2392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[24] + getitem_2393: "f32[768][1]cuda:0" = _foreach_div_1[25] + getitem_2394: "f32[768][1]cuda:0" = _foreach_div_1[26] + getitem_2395: "f32[768][1]cuda:0" = _foreach_div_1[27] + getitem_2396: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[28] + getitem_2397: "f32[2304][1]cuda:0" = _foreach_div_1[29] + getitem_2398: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[30] + getitem_2399: "f32[768][1]cuda:0" = _foreach_div_1[31] + getitem_2400: "f32[768][1]cuda:0" = _foreach_div_1[32] + getitem_2401: "f32[768][1]cuda:0" = _foreach_div_1[33] + getitem_2402: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[34] + getitem_2403: "f32[3072][1]cuda:0" = _foreach_div_1[35] + getitem_2404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[36] + getitem_2405: "f32[768][1]cuda:0" = _foreach_div_1[37] + getitem_2406: "f32[768][1]cuda:0" = _foreach_div_1[38] + getitem_2407: "f32[768][1]cuda:0" = _foreach_div_1[39] + getitem_2408: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[40] + getitem_2409: "f32[2304][1]cuda:0" = _foreach_div_1[41] + getitem_2410: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[42] + getitem_2411: "f32[768][1]cuda:0" = _foreach_div_1[43] + getitem_2412: "f32[768][1]cuda:0" = _foreach_div_1[44] + getitem_2413: "f32[768][1]cuda:0" = _foreach_div_1[45] + getitem_2414: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[46] + getitem_2415: "f32[3072][1]cuda:0" = _foreach_div_1[47] + getitem_2416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[48] + getitem_2417: "f32[768][1]cuda:0" = _foreach_div_1[49] + getitem_2418: "f32[768][1]cuda:0" = _foreach_div_1[50] + getitem_2419: "f32[768][1]cuda:0" = _foreach_div_1[51] + getitem_2420: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[52] + getitem_2421: "f32[2304][1]cuda:0" = _foreach_div_1[53] + getitem_2422: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[54] + getitem_2423: "f32[768][1]cuda:0" = _foreach_div_1[55] + getitem_2424: "f32[768][1]cuda:0" = _foreach_div_1[56] + getitem_2425: "f32[768][1]cuda:0" = _foreach_div_1[57] + getitem_2426: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[58] + getitem_2427: "f32[3072][1]cuda:0" = _foreach_div_1[59] + getitem_2428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[60] + getitem_2429: "f32[768][1]cuda:0" = _foreach_div_1[61] + getitem_2430: "f32[768][1]cuda:0" = _foreach_div_1[62] + getitem_2431: "f32[768][1]cuda:0" = _foreach_div_1[63] + getitem_2432: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[64] + getitem_2433: "f32[2304][1]cuda:0" = _foreach_div_1[65] + getitem_2434: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[66] + getitem_2435: "f32[768][1]cuda:0" = _foreach_div_1[67] + getitem_2436: "f32[768][1]cuda:0" = _foreach_div_1[68] + getitem_2437: "f32[768][1]cuda:0" = _foreach_div_1[69] + getitem_2438: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[70] + getitem_2439: "f32[3072][1]cuda:0" = _foreach_div_1[71] + getitem_2440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[72] + getitem_2441: "f32[768][1]cuda:0" = _foreach_div_1[73] + getitem_2442: "f32[768][1]cuda:0" = _foreach_div_1[74] + getitem_2443: "f32[768][1]cuda:0" = _foreach_div_1[75] + getitem_2444: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[76] + getitem_2445: "f32[2304][1]cuda:0" = _foreach_div_1[77] + getitem_2446: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[78] + getitem_2447: "f32[768][1]cuda:0" = _foreach_div_1[79] + getitem_2448: "f32[768][1]cuda:0" = _foreach_div_1[80] + getitem_2449: "f32[768][1]cuda:0" = _foreach_div_1[81] + getitem_2450: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[82] + getitem_2451: "f32[3072][1]cuda:0" = _foreach_div_1[83] + getitem_2452: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[84] + getitem_2453: "f32[768][1]cuda:0" = _foreach_div_1[85] + getitem_2454: "f32[768][1]cuda:0" = _foreach_div_1[86] + getitem_2455: "f32[768][1]cuda:0" = _foreach_div_1[87] + getitem_2456: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[88] + getitem_2457: "f32[2304][1]cuda:0" = _foreach_div_1[89] + getitem_2458: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[90] + getitem_2459: "f32[768][1]cuda:0" = _foreach_div_1[91] + getitem_2460: "f32[768][1]cuda:0" = _foreach_div_1[92] + getitem_2461: "f32[768][1]cuda:0" = _foreach_div_1[93] + getitem_2462: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[94] + getitem_2463: "f32[3072][1]cuda:0" = _foreach_div_1[95] + getitem_2464: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[96] + getitem_2465: "f32[768][1]cuda:0" = _foreach_div_1[97] + getitem_2466: "f32[768][1]cuda:0" = _foreach_div_1[98] + getitem_2467: "f32[768][1]cuda:0" = _foreach_div_1[99] + getitem_2468: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[100] + getitem_2469: "f32[2304][1]cuda:0" = _foreach_div_1[101] + getitem_2470: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[102] + getitem_2471: "f32[768][1]cuda:0" = _foreach_div_1[103] + getitem_2472: "f32[768][1]cuda:0" = _foreach_div_1[104] + getitem_2473: "f32[768][1]cuda:0" = _foreach_div_1[105] + getitem_2474: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[106] + getitem_2475: "f32[3072][1]cuda:0" = _foreach_div_1[107] + getitem_2476: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[108] + getitem_2477: "f32[768][1]cuda:0" = _foreach_div_1[109] + getitem_2478: "f32[768][1]cuda:0" = _foreach_div_1[110] + getitem_2479: "f32[768][1]cuda:0" = _foreach_div_1[111] + getitem_2480: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[112] + getitem_2481: "f32[2304][1]cuda:0" = _foreach_div_1[113] + getitem_2482: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[114] + getitem_2483: "f32[768][1]cuda:0" = _foreach_div_1[115] + getitem_2484: "f32[768][1]cuda:0" = _foreach_div_1[116] + getitem_2485: "f32[768][1]cuda:0" = _foreach_div_1[117] + getitem_2486: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[118] + getitem_2487: "f32[3072][1]cuda:0" = _foreach_div_1[119] + getitem_2488: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[120] + getitem_2489: "f32[768][1]cuda:0" = _foreach_div_1[121] + getitem_2490: "f32[768][1]cuda:0" = _foreach_div_1[122] + getitem_2491: "f32[768][1]cuda:0" = _foreach_div_1[123] + getitem_2492: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[124] + getitem_2493: "f32[2304][1]cuda:0" = _foreach_div_1[125] + getitem_2494: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[126] + getitem_2495: "f32[768][1]cuda:0" = _foreach_div_1[127] + getitem_2496: "f32[768][1]cuda:0" = _foreach_div_1[128] + getitem_2497: "f32[768][1]cuda:0" = _foreach_div_1[129] + getitem_2498: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[130] + getitem_2499: "f32[3072][1]cuda:0" = _foreach_div_1[131] + getitem_2500: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[132] + getitem_2501: "f32[768][1]cuda:0" = _foreach_div_1[133] + getitem_2502: "f32[768][1]cuda:0" = _foreach_div_1[134] + getitem_2503: "f32[768][1]cuda:0" = _foreach_div_1[135] + getitem_2504: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[136] + getitem_2505: "f32[2304][1]cuda:0" = _foreach_div_1[137] + getitem_2506: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[138] + getitem_2507: "f32[768][1]cuda:0" = _foreach_div_1[139] + getitem_2508: "f32[768][1]cuda:0" = _foreach_div_1[140] + getitem_2509: "f32[768][1]cuda:0" = _foreach_div_1[141] + getitem_2510: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[142] + getitem_2511: "f32[3072][1]cuda:0" = _foreach_div_1[143] + getitem_2512: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[144] + getitem_2513: "f32[768][1]cuda:0" = _foreach_div_1[145] + getitem_2514: "f32[768][1]cuda:0" = _foreach_div_1[146] + getitem_2515: "f32[768][1]cuda:0" = _foreach_div_1[147]; _foreach_div_1 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps) + _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08); getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None + getitem_2516: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_3[0] + getitem_2517: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_3[1] + getitem_2518: "f32[768][1]cuda:0" = _foreach_add_3[2] + getitem_2519: "f32[768][1]cuda:0" = _foreach_add_3[3] + getitem_2520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[4] + getitem_2521: "f32[2304][1]cuda:0" = _foreach_add_3[5] + getitem_2522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[6] + getitem_2523: "f32[768][1]cuda:0" = _foreach_add_3[7] + getitem_2524: "f32[768][1]cuda:0" = _foreach_add_3[8] + getitem_2525: "f32[768][1]cuda:0" = _foreach_add_3[9] + getitem_2526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[10] + getitem_2527: "f32[3072][1]cuda:0" = _foreach_add_3[11] + getitem_2528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[12] + getitem_2529: "f32[768][1]cuda:0" = _foreach_add_3[13] + getitem_2530: "f32[768][1]cuda:0" = _foreach_add_3[14] + getitem_2531: "f32[768][1]cuda:0" = _foreach_add_3[15] + getitem_2532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[16] + getitem_2533: "f32[2304][1]cuda:0" = _foreach_add_3[17] + getitem_2534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[18] + getitem_2535: "f32[768][1]cuda:0" = _foreach_add_3[19] + getitem_2536: "f32[768][1]cuda:0" = _foreach_add_3[20] + getitem_2537: "f32[768][1]cuda:0" = _foreach_add_3[21] + getitem_2538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[22] + getitem_2539: "f32[3072][1]cuda:0" = _foreach_add_3[23] + getitem_2540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[24] + getitem_2541: "f32[768][1]cuda:0" = _foreach_add_3[25] + getitem_2542: "f32[768][1]cuda:0" = _foreach_add_3[26] + getitem_2543: "f32[768][1]cuda:0" = _foreach_add_3[27] + getitem_2544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[28] + getitem_2545: "f32[2304][1]cuda:0" = _foreach_add_3[29] + getitem_2546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[30] + getitem_2547: "f32[768][1]cuda:0" = _foreach_add_3[31] + getitem_2548: "f32[768][1]cuda:0" = _foreach_add_3[32] + getitem_2549: "f32[768][1]cuda:0" = _foreach_add_3[33] + getitem_2550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[34] + getitem_2551: "f32[3072][1]cuda:0" = _foreach_add_3[35] + getitem_2552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[36] + getitem_2553: "f32[768][1]cuda:0" = _foreach_add_3[37] + getitem_2554: "f32[768][1]cuda:0" = _foreach_add_3[38] + getitem_2555: "f32[768][1]cuda:0" = _foreach_add_3[39] + getitem_2556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[40] + getitem_2557: "f32[2304][1]cuda:0" = _foreach_add_3[41] + getitem_2558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[42] + getitem_2559: "f32[768][1]cuda:0" = _foreach_add_3[43] + getitem_2560: "f32[768][1]cuda:0" = _foreach_add_3[44] + getitem_2561: "f32[768][1]cuda:0" = _foreach_add_3[45] + getitem_2562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[46] + getitem_2563: "f32[3072][1]cuda:0" = _foreach_add_3[47] + getitem_2564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[48] + getitem_2565: "f32[768][1]cuda:0" = _foreach_add_3[49] + getitem_2566: "f32[768][1]cuda:0" = _foreach_add_3[50] + getitem_2567: "f32[768][1]cuda:0" = _foreach_add_3[51] + getitem_2568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[52] + getitem_2569: "f32[2304][1]cuda:0" = _foreach_add_3[53] + getitem_2570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[54] + getitem_2571: "f32[768][1]cuda:0" = _foreach_add_3[55] + getitem_2572: "f32[768][1]cuda:0" = _foreach_add_3[56] + getitem_2573: "f32[768][1]cuda:0" = _foreach_add_3[57] + getitem_2574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[58] + getitem_2575: "f32[3072][1]cuda:0" = _foreach_add_3[59] + getitem_2576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[60] + getitem_2577: "f32[768][1]cuda:0" = _foreach_add_3[61] + getitem_2578: "f32[768][1]cuda:0" = _foreach_add_3[62] + getitem_2579: "f32[768][1]cuda:0" = _foreach_add_3[63] + getitem_2580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[64] + getitem_2581: "f32[2304][1]cuda:0" = _foreach_add_3[65] + getitem_2582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[66] + getitem_2583: "f32[768][1]cuda:0" = _foreach_add_3[67] + getitem_2584: "f32[768][1]cuda:0" = _foreach_add_3[68] + getitem_2585: "f32[768][1]cuda:0" = _foreach_add_3[69] + getitem_2586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[70] + getitem_2587: "f32[3072][1]cuda:0" = _foreach_add_3[71] + getitem_2588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[72] + getitem_2589: "f32[768][1]cuda:0" = _foreach_add_3[73] + getitem_2590: "f32[768][1]cuda:0" = _foreach_add_3[74] + getitem_2591: "f32[768][1]cuda:0" = _foreach_add_3[75] + getitem_2592: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[76] + getitem_2593: "f32[2304][1]cuda:0" = _foreach_add_3[77] + getitem_2594: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[78] + getitem_2595: "f32[768][1]cuda:0" = _foreach_add_3[79] + getitem_2596: "f32[768][1]cuda:0" = _foreach_add_3[80] + getitem_2597: "f32[768][1]cuda:0" = _foreach_add_3[81] + getitem_2598: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[82] + getitem_2599: "f32[3072][1]cuda:0" = _foreach_add_3[83] + getitem_2600: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[84] + getitem_2601: "f32[768][1]cuda:0" = _foreach_add_3[85] + getitem_2602: "f32[768][1]cuda:0" = _foreach_add_3[86] + getitem_2603: "f32[768][1]cuda:0" = _foreach_add_3[87] + getitem_2604: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[88] + getitem_2605: "f32[2304][1]cuda:0" = _foreach_add_3[89] + getitem_2606: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[90] + getitem_2607: "f32[768][1]cuda:0" = _foreach_add_3[91] + getitem_2608: "f32[768][1]cuda:0" = _foreach_add_3[92] + getitem_2609: "f32[768][1]cuda:0" = _foreach_add_3[93] + getitem_2610: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[94] + getitem_2611: "f32[3072][1]cuda:0" = _foreach_add_3[95] + getitem_2612: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[96] + getitem_2613: "f32[768][1]cuda:0" = _foreach_add_3[97] + getitem_2614: "f32[768][1]cuda:0" = _foreach_add_3[98] + getitem_2615: "f32[768][1]cuda:0" = _foreach_add_3[99] + getitem_2616: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[100] + getitem_2617: "f32[2304][1]cuda:0" = _foreach_add_3[101] + getitem_2618: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[102] + getitem_2619: "f32[768][1]cuda:0" = _foreach_add_3[103] + getitem_2620: "f32[768][1]cuda:0" = _foreach_add_3[104] + getitem_2621: "f32[768][1]cuda:0" = _foreach_add_3[105] + getitem_2622: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[106] + getitem_2623: "f32[3072][1]cuda:0" = _foreach_add_3[107] + getitem_2624: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[108] + getitem_2625: "f32[768][1]cuda:0" = _foreach_add_3[109] + getitem_2626: "f32[768][1]cuda:0" = _foreach_add_3[110] + getitem_2627: "f32[768][1]cuda:0" = _foreach_add_3[111] + getitem_2628: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[112] + getitem_2629: "f32[2304][1]cuda:0" = _foreach_add_3[113] + getitem_2630: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[114] + getitem_2631: "f32[768][1]cuda:0" = _foreach_add_3[115] + getitem_2632: "f32[768][1]cuda:0" = _foreach_add_3[116] + getitem_2633: "f32[768][1]cuda:0" = _foreach_add_3[117] + getitem_2634: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[118] + getitem_2635: "f32[3072][1]cuda:0" = _foreach_add_3[119] + getitem_2636: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[120] + getitem_2637: "f32[768][1]cuda:0" = _foreach_add_3[121] + getitem_2638: "f32[768][1]cuda:0" = _foreach_add_3[122] + getitem_2639: "f32[768][1]cuda:0" = _foreach_add_3[123] + getitem_2640: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[124] + getitem_2641: "f32[2304][1]cuda:0" = _foreach_add_3[125] + getitem_2642: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[126] + getitem_2643: "f32[768][1]cuda:0" = _foreach_add_3[127] + getitem_2644: "f32[768][1]cuda:0" = _foreach_add_3[128] + getitem_2645: "f32[768][1]cuda:0" = _foreach_add_3[129] + getitem_2646: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[130] + getitem_2647: "f32[3072][1]cuda:0" = _foreach_add_3[131] + getitem_2648: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[132] + getitem_2649: "f32[768][1]cuda:0" = _foreach_add_3[133] + getitem_2650: "f32[768][1]cuda:0" = _foreach_add_3[134] + getitem_2651: "f32[768][1]cuda:0" = _foreach_add_3[135] + getitem_2652: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[136] + getitem_2653: "f32[2304][1]cuda:0" = _foreach_add_3[137] + getitem_2654: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[138] + getitem_2655: "f32[768][1]cuda:0" = _foreach_add_3[139] + getitem_2656: "f32[768][1]cuda:0" = _foreach_add_3[140] + getitem_2657: "f32[768][1]cuda:0" = _foreach_add_3[141] + getitem_2658: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[142] + getitem_2659: "f32[3072][1]cuda:0" = _foreach_add_3[143] + getitem_2660: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[144] + getitem_2661: "f32[768][1]cuda:0" = _foreach_add_3[145] + getitem_2662: "f32[768][1]cuda:0" = _foreach_add_3[146] + getitem_2663: "f32[768][1]cuda:0" = _foreach_add_3[147]; _foreach_add_3 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size) + _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]); getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None + getitem_2664: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_2[0] + getitem_2665: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_2[1] + getitem_2666: "f32[768][1]cuda:0" = _foreach_div_2[2] + getitem_2667: "f32[768][1]cuda:0" = _foreach_div_2[3] + getitem_2668: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[4] + getitem_2669: "f32[2304][1]cuda:0" = _foreach_div_2[5] + getitem_2670: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[6] + getitem_2671: "f32[768][1]cuda:0" = _foreach_div_2[7] + getitem_2672: "f32[768][1]cuda:0" = _foreach_div_2[8] + getitem_2673: "f32[768][1]cuda:0" = _foreach_div_2[9] + getitem_2674: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[10] + getitem_2675: "f32[3072][1]cuda:0" = _foreach_div_2[11] + getitem_2676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[12] + getitem_2677: "f32[768][1]cuda:0" = _foreach_div_2[13] + getitem_2678: "f32[768][1]cuda:0" = _foreach_div_2[14] + getitem_2679: "f32[768][1]cuda:0" = _foreach_div_2[15] + getitem_2680: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[16] + getitem_2681: "f32[2304][1]cuda:0" = _foreach_div_2[17] + getitem_2682: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[18] + getitem_2683: "f32[768][1]cuda:0" = _foreach_div_2[19] + getitem_2684: "f32[768][1]cuda:0" = _foreach_div_2[20] + getitem_2685: "f32[768][1]cuda:0" = _foreach_div_2[21] + getitem_2686: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[22] + getitem_2687: "f32[3072][1]cuda:0" = _foreach_div_2[23] + getitem_2688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[24] + getitem_2689: "f32[768][1]cuda:0" = _foreach_div_2[25] + getitem_2690: "f32[768][1]cuda:0" = _foreach_div_2[26] + getitem_2691: "f32[768][1]cuda:0" = _foreach_div_2[27] + getitem_2692: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[28] + getitem_2693: "f32[2304][1]cuda:0" = _foreach_div_2[29] + getitem_2694: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[30] + getitem_2695: "f32[768][1]cuda:0" = _foreach_div_2[31] + getitem_2696: "f32[768][1]cuda:0" = _foreach_div_2[32] + getitem_2697: "f32[768][1]cuda:0" = _foreach_div_2[33] + getitem_2698: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[34] + getitem_2699: "f32[3072][1]cuda:0" = _foreach_div_2[35] + getitem_2700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[36] + getitem_2701: "f32[768][1]cuda:0" = _foreach_div_2[37] + getitem_2702: "f32[768][1]cuda:0" = _foreach_div_2[38] + getitem_2703: "f32[768][1]cuda:0" = _foreach_div_2[39] + getitem_2704: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[40] + getitem_2705: "f32[2304][1]cuda:0" = _foreach_div_2[41] + getitem_2706: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[42] + getitem_2707: "f32[768][1]cuda:0" = _foreach_div_2[43] + getitem_2708: "f32[768][1]cuda:0" = _foreach_div_2[44] + getitem_2709: "f32[768][1]cuda:0" = _foreach_div_2[45] + getitem_2710: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[46] + getitem_2711: "f32[3072][1]cuda:0" = _foreach_div_2[47] + getitem_2712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[48] + getitem_2713: "f32[768][1]cuda:0" = _foreach_div_2[49] + getitem_2714: "f32[768][1]cuda:0" = _foreach_div_2[50] + getitem_2715: "f32[768][1]cuda:0" = _foreach_div_2[51] + getitem_2716: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[52] + getitem_2717: "f32[2304][1]cuda:0" = _foreach_div_2[53] + getitem_2718: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[54] + getitem_2719: "f32[768][1]cuda:0" = _foreach_div_2[55] + getitem_2720: "f32[768][1]cuda:0" = _foreach_div_2[56] + getitem_2721: "f32[768][1]cuda:0" = _foreach_div_2[57] + getitem_2722: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[58] + getitem_2723: "f32[3072][1]cuda:0" = _foreach_div_2[59] + getitem_2724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[60] + getitem_2725: "f32[768][1]cuda:0" = _foreach_div_2[61] + getitem_2726: "f32[768][1]cuda:0" = _foreach_div_2[62] + getitem_2727: "f32[768][1]cuda:0" = _foreach_div_2[63] + getitem_2728: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[64] + getitem_2729: "f32[2304][1]cuda:0" = _foreach_div_2[65] + getitem_2730: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[66] + getitem_2731: "f32[768][1]cuda:0" = _foreach_div_2[67] + getitem_2732: "f32[768][1]cuda:0" = _foreach_div_2[68] + getitem_2733: "f32[768][1]cuda:0" = _foreach_div_2[69] + getitem_2734: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[70] + getitem_2735: "f32[3072][1]cuda:0" = _foreach_div_2[71] + getitem_2736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[72] + getitem_2737: "f32[768][1]cuda:0" = _foreach_div_2[73] + getitem_2738: "f32[768][1]cuda:0" = _foreach_div_2[74] + getitem_2739: "f32[768][1]cuda:0" = _foreach_div_2[75] + getitem_2740: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[76] + getitem_2741: "f32[2304][1]cuda:0" = _foreach_div_2[77] + getitem_2742: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[78] + getitem_2743: "f32[768][1]cuda:0" = _foreach_div_2[79] + getitem_2744: "f32[768][1]cuda:0" = _foreach_div_2[80] + getitem_2745: "f32[768][1]cuda:0" = _foreach_div_2[81] + getitem_2746: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[82] + getitem_2747: "f32[3072][1]cuda:0" = _foreach_div_2[83] + getitem_2748: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[84] + getitem_2749: "f32[768][1]cuda:0" = _foreach_div_2[85] + getitem_2750: "f32[768][1]cuda:0" = _foreach_div_2[86] + getitem_2751: "f32[768][1]cuda:0" = _foreach_div_2[87] + getitem_2752: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[88] + getitem_2753: "f32[2304][1]cuda:0" = _foreach_div_2[89] + getitem_2754: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[90] + getitem_2755: "f32[768][1]cuda:0" = _foreach_div_2[91] + getitem_2756: "f32[768][1]cuda:0" = _foreach_div_2[92] + getitem_2757: "f32[768][1]cuda:0" = _foreach_div_2[93] + getitem_2758: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[94] + getitem_2759: "f32[3072][1]cuda:0" = _foreach_div_2[95] + getitem_2760: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[96] + getitem_2761: "f32[768][1]cuda:0" = _foreach_div_2[97] + getitem_2762: "f32[768][1]cuda:0" = _foreach_div_2[98] + getitem_2763: "f32[768][1]cuda:0" = _foreach_div_2[99] + getitem_2764: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[100] + getitem_2765: "f32[2304][1]cuda:0" = _foreach_div_2[101] + getitem_2766: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[102] + getitem_2767: "f32[768][1]cuda:0" = _foreach_div_2[103] + getitem_2768: "f32[768][1]cuda:0" = _foreach_div_2[104] + getitem_2769: "f32[768][1]cuda:0" = _foreach_div_2[105] + getitem_2770: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[106] + getitem_2771: "f32[3072][1]cuda:0" = _foreach_div_2[107] + getitem_2772: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[108] + getitem_2773: "f32[768][1]cuda:0" = _foreach_div_2[109] + getitem_2774: "f32[768][1]cuda:0" = _foreach_div_2[110] + getitem_2775: "f32[768][1]cuda:0" = _foreach_div_2[111] + getitem_2776: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[112] + getitem_2777: "f32[2304][1]cuda:0" = _foreach_div_2[113] + getitem_2778: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[114] + getitem_2779: "f32[768][1]cuda:0" = _foreach_div_2[115] + getitem_2780: "f32[768][1]cuda:0" = _foreach_div_2[116] + getitem_2781: "f32[768][1]cuda:0" = _foreach_div_2[117] + getitem_2782: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[118] + getitem_2783: "f32[3072][1]cuda:0" = _foreach_div_2[119] + getitem_2784: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[120] + getitem_2785: "f32[768][1]cuda:0" = _foreach_div_2[121] + getitem_2786: "f32[768][1]cuda:0" = _foreach_div_2[122] + getitem_2787: "f32[768][1]cuda:0" = _foreach_div_2[123] + getitem_2788: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[124] + getitem_2789: "f32[2304][1]cuda:0" = _foreach_div_2[125] + getitem_2790: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[126] + getitem_2791: "f32[768][1]cuda:0" = _foreach_div_2[127] + getitem_2792: "f32[768][1]cuda:0" = _foreach_div_2[128] + getitem_2793: "f32[768][1]cuda:0" = _foreach_div_2[129] + getitem_2794: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[130] + getitem_2795: "f32[3072][1]cuda:0" = _foreach_div_2[131] + getitem_2796: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[132] + getitem_2797: "f32[768][1]cuda:0" = _foreach_div_2[133] + getitem_2798: "f32[768][1]cuda:0" = _foreach_div_2[134] + getitem_2799: "f32[768][1]cuda:0" = _foreach_div_2[135] + getitem_2800: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[136] + getitem_2801: "f32[2304][1]cuda:0" = _foreach_div_2[137] + getitem_2802: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[138] + getitem_2803: "f32[768][1]cuda:0" = _foreach_div_2[139] + getitem_2804: "f32[768][1]cuda:0" = _foreach_div_2[140] + getitem_2805: "f32[768][1]cuda:0" = _foreach_div_2[141] + getitem_2806: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[142] + getitem_2807: "f32[3072][1]cuda:0" = _foreach_div_2[143] + getitem_2808: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[144] + getitem_2809: "f32[768][1]cuda:0" = _foreach_div_2[145] + getitem_2810: "f32[768][1]cuda:0" = _foreach_div_2[146] + getitem_2811: "f32[768][1]cuda:0" = _foreach_div_2[147]; _foreach_div_2 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt) + _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]); getitem_444 = getitem_445 = getitem_446 = getitem_447 = getitem_448 = getitem_449 = getitem_450 = getitem_451 = getitem_452 = getitem_453 = getitem_454 = getitem_455 = getitem_456 = getitem_457 = getitem_458 = getitem_459 = getitem_460 = getitem_461 = getitem_462 = getitem_463 = getitem_464 = getitem_465 = getitem_466 = getitem_467 = getitem_468 = getitem_469 = getitem_470 = getitem_471 = getitem_472 = getitem_473 = getitem_474 = getitem_475 = getitem_476 = getitem_477 = getitem_478 = getitem_479 = getitem_480 = getitem_481 = getitem_482 = getitem_483 = getitem_484 = getitem_485 = getitem_486 = getitem_487 = getitem_488 = getitem_489 = getitem_490 = getitem_491 = getitem_492 = getitem_493 = getitem_494 = getitem_495 = getitem_496 = getitem_497 = getitem_498 = getitem_499 = getitem_500 = getitem_501 = getitem_502 = getitem_503 = getitem_504 = getitem_505 = getitem_506 = getitem_507 = getitem_508 = getitem_509 = getitem_510 = getitem_511 = getitem_512 = getitem_513 = getitem_514 = getitem_515 = getitem_516 = getitem_517 = getitem_518 = getitem_519 = getitem_520 = getitem_521 = getitem_522 = getitem_523 = getitem_524 = getitem_525 = getitem_526 = getitem_527 = getitem_528 = getitem_529 = getitem_530 = getitem_531 = getitem_532 = getitem_533 = getitem_534 = getitem_535 = getitem_536 = getitem_537 = getitem_538 = getitem_539 = getitem_540 = getitem_541 = getitem_542 = getitem_543 = getitem_544 = getitem_545 = getitem_546 = getitem_547 = getitem_548 = getitem_549 = getitem_550 = getitem_551 = getitem_552 = getitem_553 = getitem_554 = getitem_555 = getitem_556 = getitem_557 = getitem_558 = getitem_559 = getitem_560 = getitem_561 = getitem_562 = getitem_563 = getitem_564 = getitem_565 = getitem_566 = getitem_567 = getitem_568 = getitem_569 = getitem_570 = getitem_571 = getitem_572 = getitem_573 = getitem_574 = getitem_575 = getitem_576 = getitem_577 = getitem_578 = getitem_579 = getitem_580 = getitem_581 = getitem_582 = getitem_583 = getitem_584 = getitem_585 = getitem_586 = getitem_587 = getitem_588 = getitem_589 = getitem_590 = getitem_591 = getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None + getitem_2812: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_3[0] + getitem_2813: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_3[1] + getitem_2814: "f32[768][1]cuda:0" = _foreach_div_3[2] + getitem_2815: "f32[768][1]cuda:0" = _foreach_div_3[3] + getitem_2816: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[4] + getitem_2817: "f32[2304][1]cuda:0" = _foreach_div_3[5] + getitem_2818: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[6] + getitem_2819: "f32[768][1]cuda:0" = _foreach_div_3[7] + getitem_2820: "f32[768][1]cuda:0" = _foreach_div_3[8] + getitem_2821: "f32[768][1]cuda:0" = _foreach_div_3[9] + getitem_2822: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[10] + getitem_2823: "f32[3072][1]cuda:0" = _foreach_div_3[11] + getitem_2824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[12] + getitem_2825: "f32[768][1]cuda:0" = _foreach_div_3[13] + getitem_2826: "f32[768][1]cuda:0" = _foreach_div_3[14] + getitem_2827: "f32[768][1]cuda:0" = _foreach_div_3[15] + getitem_2828: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[16] + getitem_2829: "f32[2304][1]cuda:0" = _foreach_div_3[17] + getitem_2830: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[18] + getitem_2831: "f32[768][1]cuda:0" = _foreach_div_3[19] + getitem_2832: "f32[768][1]cuda:0" = _foreach_div_3[20] + getitem_2833: "f32[768][1]cuda:0" = _foreach_div_3[21] + getitem_2834: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[22] + getitem_2835: "f32[3072][1]cuda:0" = _foreach_div_3[23] + getitem_2836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[24] + getitem_2837: "f32[768][1]cuda:0" = _foreach_div_3[25] + getitem_2838: "f32[768][1]cuda:0" = _foreach_div_3[26] + getitem_2839: "f32[768][1]cuda:0" = _foreach_div_3[27] + getitem_2840: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[28] + getitem_2841: "f32[2304][1]cuda:0" = _foreach_div_3[29] + getitem_2842: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[30] + getitem_2843: "f32[768][1]cuda:0" = _foreach_div_3[31] + getitem_2844: "f32[768][1]cuda:0" = _foreach_div_3[32] + getitem_2845: "f32[768][1]cuda:0" = _foreach_div_3[33] + getitem_2846: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[34] + getitem_2847: "f32[3072][1]cuda:0" = _foreach_div_3[35] + getitem_2848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[36] + getitem_2849: "f32[768][1]cuda:0" = _foreach_div_3[37] + getitem_2850: "f32[768][1]cuda:0" = _foreach_div_3[38] + getitem_2851: "f32[768][1]cuda:0" = _foreach_div_3[39] + getitem_2852: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[40] + getitem_2853: "f32[2304][1]cuda:0" = _foreach_div_3[41] + getitem_2854: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[42] + getitem_2855: "f32[768][1]cuda:0" = _foreach_div_3[43] + getitem_2856: "f32[768][1]cuda:0" = _foreach_div_3[44] + getitem_2857: "f32[768][1]cuda:0" = _foreach_div_3[45] + getitem_2858: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[46] + getitem_2859: "f32[3072][1]cuda:0" = _foreach_div_3[47] + getitem_2860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[48] + getitem_2861: "f32[768][1]cuda:0" = _foreach_div_3[49] + getitem_2862: "f32[768][1]cuda:0" = _foreach_div_3[50] + getitem_2863: "f32[768][1]cuda:0" = _foreach_div_3[51] + getitem_2864: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[52] + getitem_2865: "f32[2304][1]cuda:0" = _foreach_div_3[53] + getitem_2866: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[54] + getitem_2867: "f32[768][1]cuda:0" = _foreach_div_3[55] + getitem_2868: "f32[768][1]cuda:0" = _foreach_div_3[56] + getitem_2869: "f32[768][1]cuda:0" = _foreach_div_3[57] + getitem_2870: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[58] + getitem_2871: "f32[3072][1]cuda:0" = _foreach_div_3[59] + getitem_2872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[60] + getitem_2873: "f32[768][1]cuda:0" = _foreach_div_3[61] + getitem_2874: "f32[768][1]cuda:0" = _foreach_div_3[62] + getitem_2875: "f32[768][1]cuda:0" = _foreach_div_3[63] + getitem_2876: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[64] + getitem_2877: "f32[2304][1]cuda:0" = _foreach_div_3[65] + getitem_2878: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[66] + getitem_2879: "f32[768][1]cuda:0" = _foreach_div_3[67] + getitem_2880: "f32[768][1]cuda:0" = _foreach_div_3[68] + getitem_2881: "f32[768][1]cuda:0" = _foreach_div_3[69] + getitem_2882: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[70] + getitem_2883: "f32[3072][1]cuda:0" = _foreach_div_3[71] + getitem_2884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[72] + getitem_2885: "f32[768][1]cuda:0" = _foreach_div_3[73] + getitem_2886: "f32[768][1]cuda:0" = _foreach_div_3[74] + getitem_2887: "f32[768][1]cuda:0" = _foreach_div_3[75] + getitem_2888: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[76] + getitem_2889: "f32[2304][1]cuda:0" = _foreach_div_3[77] + getitem_2890: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[78] + getitem_2891: "f32[768][1]cuda:0" = _foreach_div_3[79] + getitem_2892: "f32[768][1]cuda:0" = _foreach_div_3[80] + getitem_2893: "f32[768][1]cuda:0" = _foreach_div_3[81] + getitem_2894: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[82] + getitem_2895: "f32[3072][1]cuda:0" = _foreach_div_3[83] + getitem_2896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[84] + getitem_2897: "f32[768][1]cuda:0" = _foreach_div_3[85] + getitem_2898: "f32[768][1]cuda:0" = _foreach_div_3[86] + getitem_2899: "f32[768][1]cuda:0" = _foreach_div_3[87] + getitem_2900: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[88] + getitem_2901: "f32[2304][1]cuda:0" = _foreach_div_3[89] + getitem_2902: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[90] + getitem_2903: "f32[768][1]cuda:0" = _foreach_div_3[91] + getitem_2904: "f32[768][1]cuda:0" = _foreach_div_3[92] + getitem_2905: "f32[768][1]cuda:0" = _foreach_div_3[93] + getitem_2906: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[94] + getitem_2907: "f32[3072][1]cuda:0" = _foreach_div_3[95] + getitem_2908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[96] + getitem_2909: "f32[768][1]cuda:0" = _foreach_div_3[97] + getitem_2910: "f32[768][1]cuda:0" = _foreach_div_3[98] + getitem_2911: "f32[768][1]cuda:0" = _foreach_div_3[99] + getitem_2912: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[100] + getitem_2913: "f32[2304][1]cuda:0" = _foreach_div_3[101] + getitem_2914: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[102] + getitem_2915: "f32[768][1]cuda:0" = _foreach_div_3[103] + getitem_2916: "f32[768][1]cuda:0" = _foreach_div_3[104] + getitem_2917: "f32[768][1]cuda:0" = _foreach_div_3[105] + getitem_2918: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[106] + getitem_2919: "f32[3072][1]cuda:0" = _foreach_div_3[107] + getitem_2920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[108] + getitem_2921: "f32[768][1]cuda:0" = _foreach_div_3[109] + getitem_2922: "f32[768][1]cuda:0" = _foreach_div_3[110] + getitem_2923: "f32[768][1]cuda:0" = _foreach_div_3[111] + getitem_2924: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[112] + getitem_2925: "f32[2304][1]cuda:0" = _foreach_div_3[113] + getitem_2926: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[114] + getitem_2927: "f32[768][1]cuda:0" = _foreach_div_3[115] + getitem_2928: "f32[768][1]cuda:0" = _foreach_div_3[116] + getitem_2929: "f32[768][1]cuda:0" = _foreach_div_3[117] + getitem_2930: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[118] + getitem_2931: "f32[3072][1]cuda:0" = _foreach_div_3[119] + getitem_2932: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[120] + getitem_2933: "f32[768][1]cuda:0" = _foreach_div_3[121] + getitem_2934: "f32[768][1]cuda:0" = _foreach_div_3[122] + getitem_2935: "f32[768][1]cuda:0" = _foreach_div_3[123] + getitem_2936: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[124] + getitem_2937: "f32[2304][1]cuda:0" = _foreach_div_3[125] + getitem_2938: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[126] + getitem_2939: "f32[768][1]cuda:0" = _foreach_div_3[127] + getitem_2940: "f32[768][1]cuda:0" = _foreach_div_3[128] + getitem_2941: "f32[768][1]cuda:0" = _foreach_div_3[129] + getitem_2942: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[130] + getitem_2943: "f32[3072][1]cuda:0" = _foreach_div_3[131] + getitem_2944: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[132] + getitem_2945: "f32[768][1]cuda:0" = _foreach_div_3[133] + getitem_2946: "f32[768][1]cuda:0" = _foreach_div_3[134] + getitem_2947: "f32[768][1]cuda:0" = _foreach_div_3[135] + getitem_2948: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[136] + getitem_2949: "f32[2304][1]cuda:0" = _foreach_div_3[137] + getitem_2950: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[138] + getitem_2951: "f32[768][1]cuda:0" = _foreach_div_3[139] + getitem_2952: "f32[768][1]cuda:0" = _foreach_div_3[140] + getitem_2953: "f32[768][1]cuda:0" = _foreach_div_3[141] + getitem_2954: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[142] + getitem_2955: "f32[3072][1]cuda:0" = _foreach_div_3[143] + getitem_2956: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[144] + getitem_2957: "f32[768][1]cuda:0" = _foreach_div_3[145] + getitem_2958: "f32[768][1]cuda:0" = _foreach_div_3[146] + getitem_2959: "f32[768][1]cuda:0" = _foreach_div_3[147]; _foreach_div_3 = None + _foreach_add_4 = torch.ops.aten._foreach_add_.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]); arg0_1 = arg1_1 = arg2_1 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = arg11_1 = arg12_1 = arg13_1 = arg14_1 = arg15_1 = arg16_1 = arg17_1 = arg18_1 = arg19_1 = arg20_1 = arg21_1 = arg22_1 = arg23_1 = arg24_1 = arg25_1 = arg26_1 = arg27_1 = arg28_1 = arg29_1 = arg30_1 = arg31_1 = arg32_1 = arg33_1 = arg34_1 = arg35_1 = arg36_1 = arg37_1 = arg38_1 = arg39_1 = arg40_1 = arg41_1 = arg42_1 = arg43_1 = arg44_1 = arg45_1 = arg46_1 = arg47_1 = arg48_1 = arg49_1 = arg50_1 = arg51_1 = arg52_1 = arg53_1 = arg54_1 = arg55_1 = arg56_1 = arg57_1 = arg58_1 = arg59_1 = arg60_1 = arg61_1 = arg62_1 = arg63_1 = arg64_1 = arg65_1 = arg66_1 = arg67_1 = arg68_1 = arg69_1 = arg70_1 = arg71_1 = arg72_1 = arg73_1 = arg74_1 = arg75_1 = arg76_1 = arg77_1 = arg78_1 = arg79_1 = arg80_1 = arg81_1 = arg82_1 = arg83_1 = arg84_1 = arg85_1 = arg86_1 = arg87_1 = arg88_1 = arg89_1 = arg90_1 = arg91_1 = arg92_1 = arg93_1 = arg94_1 = arg95_1 = arg96_1 = arg97_1 = arg98_1 = arg99_1 = arg100_1 = arg101_1 = arg102_1 = arg103_1 = arg104_1 = arg105_1 = arg106_1 = arg107_1 = arg108_1 = arg109_1 = arg110_1 = arg111_1 = arg112_1 = arg113_1 = arg114_1 = arg115_1 = arg116_1 = arg117_1 = arg118_1 = arg119_1 = arg120_1 = arg121_1 = arg122_1 = arg123_1 = arg124_1 = arg125_1 = arg126_1 = arg127_1 = arg128_1 = arg129_1 = arg130_1 = arg131_1 = arg132_1 = arg133_1 = arg134_1 = arg135_1 = arg136_1 = arg137_1 = arg138_1 = arg139_1 = arg140_1 = arg141_1 = arg142_1 = arg143_1 = arg144_1 = arg145_1 = arg146_1 = arg147_1 = getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None + getitem_2960: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_4[0]; getitem_2960 = None + getitem_2961: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_4[1]; getitem_2961 = None + getitem_2962: "f32[768][1]cuda:0" = _foreach_add_4[2]; getitem_2962 = None + getitem_2963: "f32[768][1]cuda:0" = _foreach_add_4[3]; getitem_2963 = None + getitem_2964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[4]; getitem_2964 = None + getitem_2965: "f32[2304][1]cuda:0" = _foreach_add_4[5]; getitem_2965 = None + getitem_2966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[6]; getitem_2966 = None + getitem_2967: "f32[768][1]cuda:0" = _foreach_add_4[7]; getitem_2967 = None + getitem_2968: "f32[768][1]cuda:0" = _foreach_add_4[8]; getitem_2968 = None + getitem_2969: "f32[768][1]cuda:0" = _foreach_add_4[9]; getitem_2969 = None + getitem_2970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[10]; getitem_2970 = None + getitem_2971: "f32[3072][1]cuda:0" = _foreach_add_4[11]; getitem_2971 = None + getitem_2972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[12]; getitem_2972 = None + getitem_2973: "f32[768][1]cuda:0" = _foreach_add_4[13]; getitem_2973 = None + getitem_2974: "f32[768][1]cuda:0" = _foreach_add_4[14]; getitem_2974 = None + getitem_2975: "f32[768][1]cuda:0" = _foreach_add_4[15]; getitem_2975 = None + getitem_2976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[16]; getitem_2976 = None + getitem_2977: "f32[2304][1]cuda:0" = _foreach_add_4[17]; getitem_2977 = None + getitem_2978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[18]; getitem_2978 = None + getitem_2979: "f32[768][1]cuda:0" = _foreach_add_4[19]; getitem_2979 = None + getitem_2980: "f32[768][1]cuda:0" = _foreach_add_4[20]; getitem_2980 = None + getitem_2981: "f32[768][1]cuda:0" = _foreach_add_4[21]; getitem_2981 = None + getitem_2982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[22]; getitem_2982 = None + getitem_2983: "f32[3072][1]cuda:0" = _foreach_add_4[23]; getitem_2983 = None + getitem_2984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[24]; getitem_2984 = None + getitem_2985: "f32[768][1]cuda:0" = _foreach_add_4[25]; getitem_2985 = None + getitem_2986: "f32[768][1]cuda:0" = _foreach_add_4[26]; getitem_2986 = None + getitem_2987: "f32[768][1]cuda:0" = _foreach_add_4[27]; getitem_2987 = None + getitem_2988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[28]; getitem_2988 = None + getitem_2989: "f32[2304][1]cuda:0" = _foreach_add_4[29]; getitem_2989 = None + getitem_2990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[30]; getitem_2990 = None + getitem_2991: "f32[768][1]cuda:0" = _foreach_add_4[31]; getitem_2991 = None + getitem_2992: "f32[768][1]cuda:0" = _foreach_add_4[32]; getitem_2992 = None + getitem_2993: "f32[768][1]cuda:0" = _foreach_add_4[33]; getitem_2993 = None + getitem_2994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[34]; getitem_2994 = None + getitem_2995: "f32[3072][1]cuda:0" = _foreach_add_4[35]; getitem_2995 = None + getitem_2996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[36]; getitem_2996 = None + getitem_2997: "f32[768][1]cuda:0" = _foreach_add_4[37]; getitem_2997 = None + getitem_2998: "f32[768][1]cuda:0" = _foreach_add_4[38]; getitem_2998 = None + getitem_2999: "f32[768][1]cuda:0" = _foreach_add_4[39]; getitem_2999 = None + getitem_3000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[40]; getitem_3000 = None + getitem_3001: "f32[2304][1]cuda:0" = _foreach_add_4[41]; getitem_3001 = None + getitem_3002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[42]; getitem_3002 = None + getitem_3003: "f32[768][1]cuda:0" = _foreach_add_4[43]; getitem_3003 = None + getitem_3004: "f32[768][1]cuda:0" = _foreach_add_4[44]; getitem_3004 = None + getitem_3005: "f32[768][1]cuda:0" = _foreach_add_4[45]; getitem_3005 = None + getitem_3006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[46]; getitem_3006 = None + getitem_3007: "f32[3072][1]cuda:0" = _foreach_add_4[47]; getitem_3007 = None + getitem_3008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[48]; getitem_3008 = None + getitem_3009: "f32[768][1]cuda:0" = _foreach_add_4[49]; getitem_3009 = None + getitem_3010: "f32[768][1]cuda:0" = _foreach_add_4[50]; getitem_3010 = None + getitem_3011: "f32[768][1]cuda:0" = _foreach_add_4[51]; getitem_3011 = None + getitem_3012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[52]; getitem_3012 = None + getitem_3013: "f32[2304][1]cuda:0" = _foreach_add_4[53]; getitem_3013 = None + getitem_3014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[54]; getitem_3014 = None + getitem_3015: "f32[768][1]cuda:0" = _foreach_add_4[55]; getitem_3015 = None + getitem_3016: "f32[768][1]cuda:0" = _foreach_add_4[56]; getitem_3016 = None + getitem_3017: "f32[768][1]cuda:0" = _foreach_add_4[57]; getitem_3017 = None + getitem_3018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[58]; getitem_3018 = None + getitem_3019: "f32[3072][1]cuda:0" = _foreach_add_4[59]; getitem_3019 = None + getitem_3020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[60]; getitem_3020 = None + getitem_3021: "f32[768][1]cuda:0" = _foreach_add_4[61]; getitem_3021 = None + getitem_3022: "f32[768][1]cuda:0" = _foreach_add_4[62]; getitem_3022 = None + getitem_3023: "f32[768][1]cuda:0" = _foreach_add_4[63]; getitem_3023 = None + getitem_3024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[64]; getitem_3024 = None + getitem_3025: "f32[2304][1]cuda:0" = _foreach_add_4[65]; getitem_3025 = None + getitem_3026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[66]; getitem_3026 = None + getitem_3027: "f32[768][1]cuda:0" = _foreach_add_4[67]; getitem_3027 = None + getitem_3028: "f32[768][1]cuda:0" = _foreach_add_4[68]; getitem_3028 = None + getitem_3029: "f32[768][1]cuda:0" = _foreach_add_4[69]; getitem_3029 = None + getitem_3030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[70]; getitem_3030 = None + getitem_3031: "f32[3072][1]cuda:0" = _foreach_add_4[71]; getitem_3031 = None + getitem_3032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[72]; getitem_3032 = None + getitem_3033: "f32[768][1]cuda:0" = _foreach_add_4[73]; getitem_3033 = None + getitem_3034: "f32[768][1]cuda:0" = _foreach_add_4[74]; getitem_3034 = None + getitem_3035: "f32[768][1]cuda:0" = _foreach_add_4[75]; getitem_3035 = None + getitem_3036: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[76]; getitem_3036 = None + getitem_3037: "f32[2304][1]cuda:0" = _foreach_add_4[77]; getitem_3037 = None + getitem_3038: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[78]; getitem_3038 = None + getitem_3039: "f32[768][1]cuda:0" = _foreach_add_4[79]; getitem_3039 = None + getitem_3040: "f32[768][1]cuda:0" = _foreach_add_4[80]; getitem_3040 = None + getitem_3041: "f32[768][1]cuda:0" = _foreach_add_4[81]; getitem_3041 = None + getitem_3042: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[82]; getitem_3042 = None + getitem_3043: "f32[3072][1]cuda:0" = _foreach_add_4[83]; getitem_3043 = None + getitem_3044: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[84]; getitem_3044 = None + getitem_3045: "f32[768][1]cuda:0" = _foreach_add_4[85]; getitem_3045 = None + getitem_3046: "f32[768][1]cuda:0" = _foreach_add_4[86]; getitem_3046 = None + getitem_3047: "f32[768][1]cuda:0" = _foreach_add_4[87]; getitem_3047 = None + getitem_3048: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[88]; getitem_3048 = None + getitem_3049: "f32[2304][1]cuda:0" = _foreach_add_4[89]; getitem_3049 = None + getitem_3050: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[90]; getitem_3050 = None + getitem_3051: "f32[768][1]cuda:0" = _foreach_add_4[91]; getitem_3051 = None + getitem_3052: "f32[768][1]cuda:0" = _foreach_add_4[92]; getitem_3052 = None + getitem_3053: "f32[768][1]cuda:0" = _foreach_add_4[93]; getitem_3053 = None + getitem_3054: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[94]; getitem_3054 = None + getitem_3055: "f32[3072][1]cuda:0" = _foreach_add_4[95]; getitem_3055 = None + getitem_3056: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[96]; getitem_3056 = None + getitem_3057: "f32[768][1]cuda:0" = _foreach_add_4[97]; getitem_3057 = None + getitem_3058: "f32[768][1]cuda:0" = _foreach_add_4[98]; getitem_3058 = None + getitem_3059: "f32[768][1]cuda:0" = _foreach_add_4[99]; getitem_3059 = None + getitem_3060: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[100]; getitem_3060 = None + getitem_3061: "f32[2304][1]cuda:0" = _foreach_add_4[101]; getitem_3061 = None + getitem_3062: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[102]; getitem_3062 = None + getitem_3063: "f32[768][1]cuda:0" = _foreach_add_4[103]; getitem_3063 = None + getitem_3064: "f32[768][1]cuda:0" = _foreach_add_4[104]; getitem_3064 = None + getitem_3065: "f32[768][1]cuda:0" = _foreach_add_4[105]; getitem_3065 = None + getitem_3066: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[106]; getitem_3066 = None + getitem_3067: "f32[3072][1]cuda:0" = _foreach_add_4[107]; getitem_3067 = None + getitem_3068: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[108]; getitem_3068 = None + getitem_3069: "f32[768][1]cuda:0" = _foreach_add_4[109]; getitem_3069 = None + getitem_3070: "f32[768][1]cuda:0" = _foreach_add_4[110]; getitem_3070 = None + getitem_3071: "f32[768][1]cuda:0" = _foreach_add_4[111]; getitem_3071 = None + getitem_3072: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[112]; getitem_3072 = None + getitem_3073: "f32[2304][1]cuda:0" = _foreach_add_4[113]; getitem_3073 = None + getitem_3074: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[114]; getitem_3074 = None + getitem_3075: "f32[768][1]cuda:0" = _foreach_add_4[115]; getitem_3075 = None + getitem_3076: "f32[768][1]cuda:0" = _foreach_add_4[116]; getitem_3076 = None + getitem_3077: "f32[768][1]cuda:0" = _foreach_add_4[117]; getitem_3077 = None + getitem_3078: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[118]; getitem_3078 = None + getitem_3079: "f32[3072][1]cuda:0" = _foreach_add_4[119]; getitem_3079 = None + getitem_3080: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[120]; getitem_3080 = None + getitem_3081: "f32[768][1]cuda:0" = _foreach_add_4[121]; getitem_3081 = None + getitem_3082: "f32[768][1]cuda:0" = _foreach_add_4[122]; getitem_3082 = None + getitem_3083: "f32[768][1]cuda:0" = _foreach_add_4[123]; getitem_3083 = None + getitem_3084: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[124]; getitem_3084 = None + getitem_3085: "f32[2304][1]cuda:0" = _foreach_add_4[125]; getitem_3085 = None + getitem_3086: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[126]; getitem_3086 = None + getitem_3087: "f32[768][1]cuda:0" = _foreach_add_4[127]; getitem_3087 = None + getitem_3088: "f32[768][1]cuda:0" = _foreach_add_4[128]; getitem_3088 = None + getitem_3089: "f32[768][1]cuda:0" = _foreach_add_4[129]; getitem_3089 = None + getitem_3090: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[130]; getitem_3090 = None + getitem_3091: "f32[3072][1]cuda:0" = _foreach_add_4[131]; getitem_3091 = None + getitem_3092: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[132]; getitem_3092 = None + getitem_3093: "f32[768][1]cuda:0" = _foreach_add_4[133]; getitem_3093 = None + getitem_3094: "f32[768][1]cuda:0" = _foreach_add_4[134]; getitem_3094 = None + getitem_3095: "f32[768][1]cuda:0" = _foreach_add_4[135]; getitem_3095 = None + getitem_3096: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[136]; getitem_3096 = None + getitem_3097: "f32[2304][1]cuda:0" = _foreach_add_4[137]; getitem_3097 = None + getitem_3098: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[138]; getitem_3098 = None + getitem_3099: "f32[768][1]cuda:0" = _foreach_add_4[139]; getitem_3099 = None + getitem_3100: "f32[768][1]cuda:0" = _foreach_add_4[140]; getitem_3100 = None + getitem_3101: "f32[768][1]cuda:0" = _foreach_add_4[141]; getitem_3101 = None + getitem_3102: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[142]; getitem_3102 = None + getitem_3103: "f32[3072][1]cuda:0" = _foreach_add_4[143]; getitem_3103 = None + getitem_3104: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[144]; getitem_3104 = None + getitem_3105: "f32[768][1]cuda:0" = _foreach_add_4[145]; getitem_3105 = None + getitem_3106: "f32[768][1]cuda:0" = _foreach_add_4[146]; getitem_3106 = None + getitem_3107: "f32[768][1]cuda:0" = _foreach_add_4[147]; _foreach_add_4 = getitem_3107 = None + + # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_( + copy__150: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg150_1, getitem_889); arg150_1 = getitem_889 = copy__150 = None + copy__298: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg446_1, getitem_888); arg446_1 = getitem_888 = copy__298 = None + copy__299: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg447_1, getitem_890); arg447_1 = getitem_890 = copy__299 = None + copy__300: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg448_1, getitem_891); arg448_1 = getitem_891 = copy__300 = None + copy__301: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg449_1, getitem_892); arg449_1 = getitem_892 = copy__301 = None + copy__302: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg450_1, getitem_893); arg450_1 = getitem_893 = copy__302 = None + copy__303: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg451_1, getitem_894); arg451_1 = getitem_894 = copy__303 = None + copy__304: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg452_1, getitem_895); arg452_1 = getitem_895 = copy__304 = None + copy__305: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg453_1, getitem_896); arg453_1 = getitem_896 = copy__305 = None + copy__306: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg454_1, getitem_897); arg454_1 = getitem_897 = copy__306 = None + copy__307: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg455_1, getitem_898); arg455_1 = getitem_898 = copy__307 = None + copy__308: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg456_1, getitem_899); arg456_1 = getitem_899 = copy__308 = None + copy__309: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg457_1, getitem_900); arg457_1 = getitem_900 = copy__309 = None + copy__310: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg458_1, getitem_901); arg458_1 = getitem_901 = copy__310 = None + copy__311: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg459_1, getitem_902); arg459_1 = getitem_902 = copy__311 = None + copy__312: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg460_1, getitem_903); arg460_1 = getitem_903 = copy__312 = None + copy__313: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg461_1, getitem_904); arg461_1 = getitem_904 = copy__313 = None + copy__314: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg462_1, getitem_905); arg462_1 = getitem_905 = copy__314 = None + copy__315: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg463_1, getitem_906); arg463_1 = getitem_906 = copy__315 = None + copy__316: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg464_1, getitem_907); arg464_1 = getitem_907 = copy__316 = None + copy__317: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg465_1, getitem_908); arg465_1 = getitem_908 = copy__317 = None + copy__318: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg466_1, getitem_909); arg466_1 = getitem_909 = copy__318 = None + copy__319: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg467_1, getitem_910); arg467_1 = getitem_910 = copy__319 = None + copy__320: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg468_1, getitem_911); arg468_1 = getitem_911 = copy__320 = None + copy__321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg469_1, getitem_912); arg469_1 = getitem_912 = copy__321 = None + copy__322: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg470_1, getitem_913); arg470_1 = getitem_913 = copy__322 = None + copy__323: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg471_1, getitem_914); arg471_1 = getitem_914 = copy__323 = None + copy__324: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg472_1, getitem_915); arg472_1 = getitem_915 = copy__324 = None + copy__325: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg473_1, getitem_916); arg473_1 = getitem_916 = copy__325 = None + copy__326: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg474_1, getitem_917); arg474_1 = getitem_917 = copy__326 = None + copy__327: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg475_1, getitem_918); arg475_1 = getitem_918 = copy__327 = None + copy__328: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg476_1, getitem_919); arg476_1 = getitem_919 = copy__328 = None + copy__329: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg477_1, getitem_920); arg477_1 = getitem_920 = copy__329 = None + copy__330: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg478_1, getitem_921); arg478_1 = getitem_921 = copy__330 = None + copy__331: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg479_1, getitem_922); arg479_1 = getitem_922 = copy__331 = None + copy__332: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg480_1, getitem_923); arg480_1 = getitem_923 = copy__332 = None + copy__333: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg481_1, getitem_924); arg481_1 = getitem_924 = copy__333 = None + copy__334: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg482_1, getitem_925); arg482_1 = getitem_925 = copy__334 = None + copy__335: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg483_1, getitem_926); arg483_1 = getitem_926 = copy__335 = None + copy__336: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg484_1, getitem_927); arg484_1 = getitem_927 = copy__336 = None + copy__337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg485_1, getitem_928); arg485_1 = getitem_928 = copy__337 = None + copy__338: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg486_1, getitem_929); arg486_1 = getitem_929 = copy__338 = None + copy__339: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg487_1, getitem_930); arg487_1 = getitem_930 = copy__339 = None + copy__340: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg488_1, getitem_931); arg488_1 = getitem_931 = copy__340 = None + copy__341: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg489_1, getitem_932); arg489_1 = getitem_932 = copy__341 = None + copy__342: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg490_1, getitem_933); arg490_1 = getitem_933 = copy__342 = None + copy__343: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg491_1, getitem_934); arg491_1 = getitem_934 = copy__343 = None + copy__344: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg492_1, getitem_935); arg492_1 = getitem_935 = copy__344 = None + copy__345: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg493_1, getitem_936); arg493_1 = getitem_936 = copy__345 = None + copy__346: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg494_1, getitem_937); arg494_1 = getitem_937 = copy__346 = None + copy__347: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg495_1, getitem_938); arg495_1 = getitem_938 = copy__347 = None + copy__348: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg496_1, getitem_939); arg496_1 = getitem_939 = copy__348 = None + copy__349: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg497_1, getitem_940); arg497_1 = getitem_940 = copy__349 = None + copy__350: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg498_1, getitem_941); arg498_1 = getitem_941 = copy__350 = None + copy__351: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg499_1, getitem_942); arg499_1 = getitem_942 = copy__351 = None + copy__352: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg500_1, getitem_943); arg500_1 = getitem_943 = copy__352 = None + copy__353: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg501_1, getitem_944); arg501_1 = getitem_944 = copy__353 = None + copy__354: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg502_1, getitem_945); arg502_1 = getitem_945 = copy__354 = None + copy__355: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg503_1, getitem_946); arg503_1 = getitem_946 = copy__355 = None + copy__356: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg504_1, getitem_947); arg504_1 = getitem_947 = copy__356 = None + copy__357: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg505_1, getitem_948); arg505_1 = getitem_948 = copy__357 = None + copy__358: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg506_1, getitem_949); arg506_1 = getitem_949 = copy__358 = None + copy__359: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg507_1, getitem_950); arg507_1 = getitem_950 = copy__359 = None + copy__360: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg508_1, getitem_951); arg508_1 = getitem_951 = copy__360 = None + copy__361: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg509_1, getitem_952); arg509_1 = getitem_952 = copy__361 = None + copy__362: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg510_1, getitem_953); arg510_1 = getitem_953 = copy__362 = None + copy__363: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg511_1, getitem_954); arg511_1 = getitem_954 = copy__363 = None + copy__364: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg512_1, getitem_955); arg512_1 = getitem_955 = copy__364 = None + copy__365: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg513_1, getitem_956); arg513_1 = getitem_956 = copy__365 = None + copy__366: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg514_1, getitem_957); arg514_1 = getitem_957 = copy__366 = None + copy__367: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg515_1, getitem_958); arg515_1 = getitem_958 = copy__367 = None + copy__368: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg516_1, getitem_959); arg516_1 = getitem_959 = copy__368 = None + copy__369: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg517_1, getitem_960); arg517_1 = getitem_960 = copy__369 = None + copy__370: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg518_1, getitem_961); arg518_1 = getitem_961 = copy__370 = None + copy__371: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg519_1, getitem_962); arg519_1 = getitem_962 = copy__371 = None + copy__372: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg520_1, getitem_963); arg520_1 = getitem_963 = copy__372 = None + copy__373: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg521_1, getitem_964); arg521_1 = getitem_964 = copy__373 = None + copy__374: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg522_1, getitem_965); arg522_1 = getitem_965 = copy__374 = None + copy__375: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg523_1, getitem_966); arg523_1 = getitem_966 = copy__375 = None + copy__376: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg524_1, getitem_967); arg524_1 = getitem_967 = copy__376 = None + copy__377: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg525_1, getitem_968); arg525_1 = getitem_968 = copy__377 = None + copy__378: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg526_1, getitem_969); arg526_1 = getitem_969 = copy__378 = None + copy__379: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg527_1, getitem_970); arg527_1 = getitem_970 = copy__379 = None + copy__380: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg528_1, getitem_971); arg528_1 = getitem_971 = copy__380 = None + copy__381: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg529_1, getitem_972); arg529_1 = getitem_972 = copy__381 = None + copy__382: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg530_1, getitem_973); arg530_1 = getitem_973 = copy__382 = None + copy__383: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg531_1, getitem_974); arg531_1 = getitem_974 = copy__383 = None + copy__384: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg532_1, getitem_975); arg532_1 = getitem_975 = copy__384 = None + copy__385: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg533_1, getitem_976); arg533_1 = getitem_976 = copy__385 = None + copy__386: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg534_1, getitem_977); arg534_1 = getitem_977 = copy__386 = None + copy__387: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg535_1, getitem_978); arg535_1 = getitem_978 = copy__387 = None + copy__388: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg536_1, getitem_979); arg536_1 = getitem_979 = copy__388 = None + copy__389: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg537_1, getitem_980); arg537_1 = getitem_980 = copy__389 = None + copy__390: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg538_1, getitem_981); arg538_1 = getitem_981 = copy__390 = None + copy__391: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg539_1, getitem_982); arg539_1 = getitem_982 = copy__391 = None + copy__392: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg540_1, getitem_983); arg540_1 = getitem_983 = copy__392 = None + copy__393: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg541_1, getitem_984); arg541_1 = getitem_984 = copy__393 = None + copy__394: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg542_1, getitem_985); arg542_1 = getitem_985 = copy__394 = None + copy__395: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg543_1, getitem_986); arg543_1 = getitem_986 = copy__395 = None + copy__396: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg544_1, getitem_987); arg544_1 = getitem_987 = copy__396 = None + copy__397: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg545_1, getitem_988); arg545_1 = getitem_988 = copy__397 = None + copy__398: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg546_1, getitem_989); arg546_1 = getitem_989 = copy__398 = None + copy__399: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg547_1, getitem_990); arg547_1 = getitem_990 = copy__399 = None + copy__400: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg548_1, getitem_991); arg548_1 = getitem_991 = copy__400 = None + copy__401: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg549_1, getitem_992); arg549_1 = getitem_992 = copy__401 = None + copy__402: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg550_1, getitem_993); arg550_1 = getitem_993 = copy__402 = None + copy__403: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg551_1, getitem_994); arg551_1 = getitem_994 = copy__403 = None + copy__404: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg552_1, getitem_995); arg552_1 = getitem_995 = copy__404 = None + copy__405: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg553_1, getitem_996); arg553_1 = getitem_996 = copy__405 = None + copy__406: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg554_1, getitem_997); arg554_1 = getitem_997 = copy__406 = None + copy__407: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg555_1, getitem_998); arg555_1 = getitem_998 = copy__407 = None + copy__408: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg556_1, getitem_999); arg556_1 = getitem_999 = copy__408 = None + copy__409: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg557_1, getitem_1000); arg557_1 = getitem_1000 = copy__409 = None + copy__410: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg558_1, getitem_1001); arg558_1 = getitem_1001 = copy__410 = None + copy__411: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg559_1, getitem_1002); arg559_1 = getitem_1002 = copy__411 = None + copy__412: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg560_1, getitem_1003); arg560_1 = getitem_1003 = copy__412 = None + copy__413: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg561_1, getitem_1004); arg561_1 = getitem_1004 = copy__413 = None + copy__414: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg562_1, getitem_1005); arg562_1 = getitem_1005 = copy__414 = None + copy__415: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg563_1, getitem_1006); arg563_1 = getitem_1006 = copy__415 = None + copy__416: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg564_1, getitem_1007); arg564_1 = getitem_1007 = copy__416 = None + copy__417: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg565_1, getitem_1008); arg565_1 = getitem_1008 = copy__417 = None + copy__418: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg566_1, getitem_1009); arg566_1 = getitem_1009 = copy__418 = None + copy__419: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg567_1, getitem_1010); arg567_1 = getitem_1010 = copy__419 = None + copy__420: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg568_1, getitem_1011); arg568_1 = getitem_1011 = copy__420 = None + copy__421: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg569_1, getitem_1012); arg569_1 = getitem_1012 = copy__421 = None + copy__422: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg570_1, getitem_1013); arg570_1 = getitem_1013 = copy__422 = None + copy__423: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg571_1, getitem_1014); arg571_1 = getitem_1014 = copy__423 = None + copy__424: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg572_1, getitem_1015); arg572_1 = getitem_1015 = copy__424 = None + copy__425: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg573_1, getitem_1016); arg573_1 = getitem_1016 = copy__425 = None + copy__426: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg574_1, getitem_1017); arg574_1 = getitem_1017 = copy__426 = None + copy__427: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg575_1, getitem_1018); arg575_1 = getitem_1018 = copy__427 = None + copy__428: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg576_1, getitem_1019); arg576_1 = getitem_1019 = copy__428 = None + copy__429: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg577_1, getitem_1020); arg577_1 = getitem_1020 = copy__429 = None + copy__430: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg578_1, getitem_1021); arg578_1 = getitem_1021 = copy__430 = None + copy__431: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg579_1, getitem_1022); arg579_1 = getitem_1022 = copy__431 = None + copy__432: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg580_1, getitem_1023); arg580_1 = getitem_1023 = copy__432 = None + copy__433: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg581_1, getitem_1024); arg581_1 = getitem_1024 = copy__433 = None + copy__434: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg582_1, getitem_1025); arg582_1 = getitem_1025 = copy__434 = None + copy__435: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg583_1, getitem_1026); arg583_1 = getitem_1026 = copy__435 = None + copy__436: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg584_1, getitem_1027); arg584_1 = getitem_1027 = copy__436 = None + copy__437: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg585_1, getitem_1028); arg585_1 = getitem_1028 = copy__437 = None + copy__438: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg586_1, getitem_1029); arg586_1 = getitem_1029 = copy__438 = None + copy__439: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg587_1, getitem_1030); arg587_1 = getitem_1030 = copy__439 = None + copy__440: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg588_1, getitem_1031); arg588_1 = getitem_1031 = copy__440 = None + copy__441: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg589_1, getitem_1032); arg589_1 = getitem_1032 = copy__441 = None + copy__442: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg590_1, getitem_1033); arg590_1 = getitem_1033 = copy__442 = None + copy__443: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg591_1, getitem_1034); arg591_1 = getitem_1034 = copy__443 = None + copy__444: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg592_1, getitem_1035); arg592_1 = getitem_1035 = copy__444 = None + return () + +V0806 13:56:11.480000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4e04a19e840301102c3e8b2b088b3abf"} + { + "name": "GraphLowering.run", + "ts": 1722977771480079.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:13.522000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9f758bbcb2d7a7a4a46da10659086286"} + { + "name": "GraphLowering.run", + "ts": 1722977773522011.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:13.523000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2fa9ac69896097e4eb4ed2ad86d20e4f"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977773523916.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:13.524000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "47abc55786f5a7bcd127c7ef1ae29b62"} + { + "name": "code_gen", + "ts": 1722977773524017.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:13.543000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "67d3bdf78f3e6ff33c008448c7b9f286"} + { + "name": "Scheduler.__init__", + "ts": 1722977773542979.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:17.133000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2a4699e4806de3efa88d475f85b1d1cb"} + { + "name": "Scheduler.__init__", + "ts": 1722977777133789.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:17.134000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c4dccb40c589272602b6ba5f0a9c3016"} + { + "name": "Scheduler.codegen", + "ts": 1722977777134222.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:18.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cdce945b72aad78fb46a9c332c97cf2e"} + { + "name": "Scheduler.codegen", + "ts": 1722977778887957.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:18.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "1c72e346bbb451706a43eaeadd1da1d2"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977778888304.8, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:18.910000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cc5032c0230219adb044a52018b70d03"} + { + "name": "WrapperCodeGen.generate", + "ts": 1722977778910246.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:18.912000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/yo/cyopfy25nuerjsxpoyw3h27mzwbtrvws3a3ylbzpnbpdmyrquagq.py"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "73fb67ed2c75d446b0061a6d9bb8d8a9"} + + # AOT ID: ['1_inference'] + from ctypes import c_void_p, c_long + import torch + import math + import random + import os + import tempfile + from math import inf, nan + from torch._inductor.hooks import run_intermediate_hooks + from torch._inductor.utils import maybe_profile + from torch._inductor.codegen.memory_planning import _align as align + + from torch import device, empty_strided + from torch._inductor.async_compile import AsyncCompile + from torch._inductor.select_algorithm import extern_kernels + from torch._inductor.codegen.multi_kernel import MultiKernelCall + + aten = torch.ops.aten + inductor_ops = torch.ops.inductor + _quantized = torch.ops._quantized + assert_size_stride = torch._C._dynamo.guards.assert_size_stride + empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu + empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda + reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor + alloc_from_pool = torch.ops.inductor._alloc_from_pool + async_compile = AsyncCompile() + + + # kernel path: /tmp/tmp2ln889l5/qe/cqegvx7pet6zxtze7xi2d3ife3fsc73adn4fimsnammc2efrmybh.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_0 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32', 152: '*fp32', 153: '*fp32', 154: '*fp32', 155: '*fp32', 156: '*fp32', 157: '*fp32', 158: '*fp32', 159: '*fp32', 160: '*fp32', 161: '*fp32', 162: '*fp32', 163: '*fp32', 164: '*fp32', 165: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_0', 'mutated_arg_names': ['in_ptr0', 'in_ptr1', 'in_ptr10', 'in_ptr11', 'in_ptr12', 'in_ptr13', 'in_ptr14', 'in_ptr15', 'in_ptr16', 'in_ptr17', 'in_ptr18', 'in_ptr19', 'in_ptr2', 'in_ptr20', 'in_ptr21', 'in_ptr22', 'in_ptr23', 'in_ptr24', 'in_ptr25', 'in_ptr26', 'in_ptr27', 'in_ptr28', 'in_ptr29', 'in_ptr3', 'in_ptr30', 'in_ptr31', 'in_ptr32', 'in_ptr33', 'in_ptr34', 'in_ptr35', 'in_ptr36', 'in_ptr37', 'in_ptr38', 'in_ptr39', 'in_ptr4', 'in_ptr40', 'in_ptr41', 'in_ptr42', 'in_ptr43', 'in_ptr44', 'in_ptr45', 'in_ptr46', 'in_ptr47', 'in_ptr48', 'in_ptr49', 'in_ptr5', 'in_ptr50', 'in_ptr51', 'in_ptr52', 'in_ptr53', 'in_ptr54', 'in_ptr55', 'in_ptr56', 'in_ptr57', 'in_ptr58', 'in_ptr59', 'in_ptr6', 'in_ptr60', 'in_ptr61', 'in_ptr62', 'in_ptr63', 'in_ptr64', 'in_ptr65', 'in_ptr66', 'in_ptr67', 'in_ptr68', 'in_ptr69', 'in_ptr7', 'in_ptr70', 'in_ptr71', 'in_ptr72', 'in_ptr73', 'in_ptr74', 'in_ptr75', 'in_ptr76', 'in_ptr77', 'in_ptr78', 'in_ptr79', 'in_ptr8', 'in_ptr80', 'in_ptr81', 'in_ptr82', 'in_ptr9', 'out_ptr0', 'out_ptr1', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr13', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr17', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr21', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr25', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr29', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr33', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr37', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr41', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr45', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr49', 'out_ptr5', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr53', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr57', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr61', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr65', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr69', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr73', 'out_ptr74', 'out_ptr75', 'out_ptr76', 'out_ptr77', 'out_ptr78', 'out_ptr79', 'out_ptr8', 'out_ptr80', 'out_ptr81', 'out_ptr82', 'out_ptr9'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, out_ptr0, out_ptr1, out_ptr2, out_ptr3, out_ptr4, out_ptr5, out_ptr6, out_ptr7, out_ptr8, out_ptr9, out_ptr10, out_ptr11, out_ptr12, out_ptr13, out_ptr14, out_ptr15, out_ptr16, out_ptr17, out_ptr18, out_ptr19, out_ptr20, out_ptr21, out_ptr22, out_ptr23, out_ptr24, out_ptr25, out_ptr26, out_ptr27, out_ptr28, out_ptr29, out_ptr30, out_ptr31, out_ptr32, out_ptr33, out_ptr34, out_ptr35, out_ptr36, out_ptr37, out_ptr38, out_ptr39, out_ptr40, out_ptr41, out_ptr42, out_ptr43, out_ptr44, out_ptr45, out_ptr46, out_ptr47, out_ptr48, out_ptr49, out_ptr50, out_ptr51, out_ptr52, out_ptr53, out_ptr54, out_ptr55, out_ptr56, out_ptr57, out_ptr58, out_ptr59, out_ptr60, out_ptr61, out_ptr62, out_ptr63, out_ptr64, out_ptr65, out_ptr66, out_ptr67, out_ptr68, out_ptr69, out_ptr70, out_ptr71, out_ptr72, out_ptr73, out_ptr74, out_ptr75, out_ptr76, out_ptr77, out_ptr78, out_ptr79, out_ptr80, out_ptr81, out_ptr82): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(1, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(1, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(1, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(1, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(1, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(1, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(1, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(1, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(1, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(1, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(1, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(1, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(1, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(1, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(1, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(1, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(1, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(1, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(1, XBLOCK) + num_xblocks_19 = num_xblocks_18 + tl.cdiv(1, XBLOCK) + num_xblocks_20 = num_xblocks_19 + tl.cdiv(1, XBLOCK) + num_xblocks_21 = num_xblocks_20 + tl.cdiv(1, XBLOCK) + num_xblocks_22 = num_xblocks_21 + tl.cdiv(1, XBLOCK) + num_xblocks_23 = num_xblocks_22 + tl.cdiv(1, XBLOCK) + num_xblocks_24 = num_xblocks_23 + tl.cdiv(1, XBLOCK) + num_xblocks_25 = num_xblocks_24 + tl.cdiv(1, XBLOCK) + num_xblocks_26 = num_xblocks_25 + tl.cdiv(1, XBLOCK) + num_xblocks_27 = num_xblocks_26 + tl.cdiv(1, XBLOCK) + num_xblocks_28 = num_xblocks_27 + tl.cdiv(1, XBLOCK) + num_xblocks_29 = num_xblocks_28 + tl.cdiv(1, XBLOCK) + num_xblocks_30 = num_xblocks_29 + tl.cdiv(1, XBLOCK) + num_xblocks_31 = num_xblocks_30 + tl.cdiv(1, XBLOCK) + num_xblocks_32 = num_xblocks_31 + tl.cdiv(1, XBLOCK) + num_xblocks_33 = num_xblocks_32 + tl.cdiv(1, XBLOCK) + num_xblocks_34 = num_xblocks_33 + tl.cdiv(1, XBLOCK) + num_xblocks_35 = num_xblocks_34 + tl.cdiv(1, XBLOCK) + num_xblocks_36 = num_xblocks_35 + tl.cdiv(1, XBLOCK) + num_xblocks_37 = num_xblocks_36 + tl.cdiv(1, XBLOCK) + num_xblocks_38 = num_xblocks_37 + tl.cdiv(1, XBLOCK) + num_xblocks_39 = num_xblocks_38 + tl.cdiv(1, XBLOCK) + num_xblocks_40 = num_xblocks_39 + tl.cdiv(1, XBLOCK) + num_xblocks_41 = num_xblocks_40 + tl.cdiv(1, XBLOCK) + num_xblocks_42 = num_xblocks_41 + tl.cdiv(1, XBLOCK) + num_xblocks_43 = num_xblocks_42 + tl.cdiv(1, XBLOCK) + num_xblocks_44 = num_xblocks_43 + tl.cdiv(1, XBLOCK) + num_xblocks_45 = num_xblocks_44 + tl.cdiv(1, XBLOCK) + num_xblocks_46 = num_xblocks_45 + tl.cdiv(1, XBLOCK) + num_xblocks_47 = num_xblocks_46 + tl.cdiv(1, XBLOCK) + num_xblocks_48 = num_xblocks_47 + tl.cdiv(1, XBLOCK) + num_xblocks_49 = num_xblocks_48 + tl.cdiv(1, XBLOCK) + num_xblocks_50 = num_xblocks_49 + tl.cdiv(1, XBLOCK) + num_xblocks_51 = num_xblocks_50 + tl.cdiv(1, XBLOCK) + num_xblocks_52 = num_xblocks_51 + tl.cdiv(1, XBLOCK) + num_xblocks_53 = num_xblocks_52 + tl.cdiv(1, XBLOCK) + num_xblocks_54 = num_xblocks_53 + tl.cdiv(1, XBLOCK) + num_xblocks_55 = num_xblocks_54 + tl.cdiv(1, XBLOCK) + num_xblocks_56 = num_xblocks_55 + tl.cdiv(1, XBLOCK) + num_xblocks_57 = num_xblocks_56 + tl.cdiv(1, XBLOCK) + num_xblocks_58 = num_xblocks_57 + tl.cdiv(1, XBLOCK) + num_xblocks_59 = num_xblocks_58 + tl.cdiv(1, XBLOCK) + num_xblocks_60 = num_xblocks_59 + tl.cdiv(1, XBLOCK) + num_xblocks_61 = num_xblocks_60 + tl.cdiv(1, XBLOCK) + num_xblocks_62 = num_xblocks_61 + tl.cdiv(1, XBLOCK) + num_xblocks_63 = num_xblocks_62 + tl.cdiv(1, XBLOCK) + num_xblocks_64 = num_xblocks_63 + tl.cdiv(1, XBLOCK) + num_xblocks_65 = num_xblocks_64 + tl.cdiv(1, XBLOCK) + num_xblocks_66 = num_xblocks_65 + tl.cdiv(1, XBLOCK) + num_xblocks_67 = num_xblocks_66 + tl.cdiv(1, XBLOCK) + num_xblocks_68 = num_xblocks_67 + tl.cdiv(1, XBLOCK) + num_xblocks_69 = num_xblocks_68 + tl.cdiv(1, XBLOCK) + num_xblocks_70 = num_xblocks_69 + tl.cdiv(1, XBLOCK) + num_xblocks_71 = num_xblocks_70 + tl.cdiv(1, XBLOCK) + num_xblocks_72 = num_xblocks_71 + tl.cdiv(1, XBLOCK) + num_xblocks_73 = num_xblocks_72 + tl.cdiv(1, XBLOCK) + num_xblocks_74 = num_xblocks_73 + tl.cdiv(1, XBLOCK) + num_xblocks_75 = num_xblocks_74 + tl.cdiv(1, XBLOCK) + num_xblocks_76 = num_xblocks_75 + tl.cdiv(1, XBLOCK) + num_xblocks_77 = num_xblocks_76 + tl.cdiv(1, XBLOCK) + num_xblocks_78 = num_xblocks_77 + tl.cdiv(1, XBLOCK) + num_xblocks_79 = num_xblocks_78 + tl.cdiv(1, XBLOCK) + num_xblocks_80 = num_xblocks_79 + tl.cdiv(1, XBLOCK) + num_xblocks_81 = num_xblocks_80 + tl.cdiv(1, XBLOCK) + num_xblocks_82 = num_xblocks_81 + tl.cdiv(1, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp0 = tl.load(in_ptr0 + (0)) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK]) + tmp2 = 1.0 + tmp3 = tmp1 + tmp2 + tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp3, None) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp4 = tl.load(in_ptr1 + (0)) + tmp5 = tl.broadcast_to(tmp4, [XBLOCK]) + tmp6 = 1.0 + tmp7 = tmp5 + tmp6 + tl.store(out_ptr1 + (tl.full([XBLOCK], 0, tl.int32)), tmp7, None) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp8 = tl.load(in_ptr2 + (0)) + tmp9 = tl.broadcast_to(tmp8, [XBLOCK]) + tmp10 = 1.0 + tmp11 = tmp9 + tmp10 + tl.store(out_ptr2 + (tl.full([XBLOCK], 0, tl.int32)), tmp11, None) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp12 = tl.load(in_ptr3 + (0)) + tmp13 = tl.broadcast_to(tmp12, [XBLOCK]) + tmp14 = 1.0 + tmp15 = tmp13 + tmp14 + tl.store(out_ptr3 + (tl.full([XBLOCK], 0, tl.int32)), tmp15, None) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp16 = tl.load(in_ptr4 + (0)) + tmp17 = tl.broadcast_to(tmp16, [XBLOCK]) + tmp18 = 1.0 + tmp19 = tmp17 + tmp18 + tl.store(out_ptr4 + (tl.full([XBLOCK], 0, tl.int32)), tmp19, None) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp20 = tl.load(in_ptr5 + (0)) + tmp21 = tl.broadcast_to(tmp20, [XBLOCK]) + tmp22 = 1.0 + tmp23 = tmp21 + tmp22 + tl.store(out_ptr5 + (tl.full([XBLOCK], 0, tl.int32)), tmp23, None) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp24 = tl.load(in_ptr6 + (0)) + tmp25 = tl.broadcast_to(tmp24, [XBLOCK]) + tmp26 = 1.0 + tmp27 = tmp25 + tmp26 + tl.store(out_ptr6 + (tl.full([XBLOCK], 0, tl.int32)), tmp27, None) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp28 = tl.load(in_ptr7 + (0)) + tmp29 = tl.broadcast_to(tmp28, [XBLOCK]) + tmp30 = 1.0 + tmp31 = tmp29 + tmp30 + tl.store(out_ptr7 + (tl.full([XBLOCK], 0, tl.int32)), tmp31, None) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp32 = tl.load(in_ptr8 + (0)) + tmp33 = tl.broadcast_to(tmp32, [XBLOCK]) + tmp34 = 1.0 + tmp35 = tmp33 + tmp34 + tl.store(out_ptr8 + (tl.full([XBLOCK], 0, tl.int32)), tmp35, None) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp36 = tl.load(in_ptr9 + (0)) + tmp37 = tl.broadcast_to(tmp36, [XBLOCK]) + tmp38 = 1.0 + tmp39 = tmp37 + tmp38 + tl.store(out_ptr9 + (tl.full([XBLOCK], 0, tl.int32)), tmp39, None) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp40 = tl.load(in_ptr10 + (0)) + tmp41 = tl.broadcast_to(tmp40, [XBLOCK]) + tmp42 = 1.0 + tmp43 = tmp41 + tmp42 + tl.store(out_ptr10 + (tl.full([XBLOCK], 0, tl.int32)), tmp43, None) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp44 = tl.load(in_ptr11 + (0)) + tmp45 = tl.broadcast_to(tmp44, [XBLOCK]) + tmp46 = 1.0 + tmp47 = tmp45 + tmp46 + tl.store(out_ptr11 + (tl.full([XBLOCK], 0, tl.int32)), tmp47, None) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp48 = tl.load(in_ptr12 + (0)) + tmp49 = tl.broadcast_to(tmp48, [XBLOCK]) + tmp50 = 1.0 + tmp51 = tmp49 + tmp50 + tl.store(out_ptr12 + (tl.full([XBLOCK], 0, tl.int32)), tmp51, None) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp52 = tl.load(in_ptr13 + (0)) + tmp53 = tl.broadcast_to(tmp52, [XBLOCK]) + tmp54 = 1.0 + tmp55 = tmp53 + tmp54 + tl.store(out_ptr13 + (tl.full([XBLOCK], 0, tl.int32)), tmp55, None) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp56 = tl.load(in_ptr14 + (0)) + tmp57 = tl.broadcast_to(tmp56, [XBLOCK]) + tmp58 = 1.0 + tmp59 = tmp57 + tmp58 + tl.store(out_ptr14 + (tl.full([XBLOCK], 0, tl.int32)), tmp59, None) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp60 = tl.load(in_ptr15 + (0)) + tmp61 = tl.broadcast_to(tmp60, [XBLOCK]) + tmp62 = 1.0 + tmp63 = tmp61 + tmp62 + tl.store(out_ptr15 + (tl.full([XBLOCK], 0, tl.int32)), tmp63, None) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp64 = tl.load(in_ptr16 + (0)) + tmp65 = tl.broadcast_to(tmp64, [XBLOCK]) + tmp66 = 1.0 + tmp67 = tmp65 + tmp66 + tl.store(out_ptr16 + (tl.full([XBLOCK], 0, tl.int32)), tmp67, None) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp68 = tl.load(in_ptr17 + (0)) + tmp69 = tl.broadcast_to(tmp68, [XBLOCK]) + tmp70 = 1.0 + tmp71 = tmp69 + tmp70 + tl.store(out_ptr17 + (tl.full([XBLOCK], 0, tl.int32)), tmp71, None) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp72 = tl.load(in_ptr18 + (0)) + tmp73 = tl.broadcast_to(tmp72, [XBLOCK]) + tmp74 = 1.0 + tmp75 = tmp73 + tmp74 + tl.store(out_ptr18 + (tl.full([XBLOCK], 0, tl.int32)), tmp75, None) + elif pid < num_xblocks_19: + pid_offset = pid - num_xblocks_18 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp76 = tl.load(in_ptr19 + (0)) + tmp77 = tl.broadcast_to(tmp76, [XBLOCK]) + tmp78 = 1.0 + tmp79 = tmp77 + tmp78 + tl.store(out_ptr19 + (tl.full([XBLOCK], 0, tl.int32)), tmp79, None) + elif pid < num_xblocks_20: + pid_offset = pid - num_xblocks_19 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp80 = tl.load(in_ptr20 + (0)) + tmp81 = tl.broadcast_to(tmp80, [XBLOCK]) + tmp82 = 1.0 + tmp83 = tmp81 + tmp82 + tl.store(out_ptr20 + (tl.full([XBLOCK], 0, tl.int32)), tmp83, None) + elif pid < num_xblocks_21: + pid_offset = pid - num_xblocks_20 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp84 = tl.load(in_ptr21 + (0)) + tmp85 = tl.broadcast_to(tmp84, [XBLOCK]) + tmp86 = 1.0 + tmp87 = tmp85 + tmp86 + tl.store(out_ptr21 + (tl.full([XBLOCK], 0, tl.int32)), tmp87, None) + elif pid < num_xblocks_22: + pid_offset = pid - num_xblocks_21 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp88 = tl.load(in_ptr22 + (0)) + tmp89 = tl.broadcast_to(tmp88, [XBLOCK]) + tmp90 = 1.0 + tmp91 = tmp89 + tmp90 + tl.store(out_ptr22 + (tl.full([XBLOCK], 0, tl.int32)), tmp91, None) + elif pid < num_xblocks_23: + pid_offset = pid - num_xblocks_22 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp92 = tl.load(in_ptr23 + (0)) + tmp93 = tl.broadcast_to(tmp92, [XBLOCK]) + tmp94 = 1.0 + tmp95 = tmp93 + tmp94 + tl.store(out_ptr23 + (tl.full([XBLOCK], 0, tl.int32)), tmp95, None) + elif pid < num_xblocks_24: + pid_offset = pid - num_xblocks_23 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp96 = tl.load(in_ptr24 + (0)) + tmp97 = tl.broadcast_to(tmp96, [XBLOCK]) + tmp98 = 1.0 + tmp99 = tmp97 + tmp98 + tl.store(out_ptr24 + (tl.full([XBLOCK], 0, tl.int32)), tmp99, None) + elif pid < num_xblocks_25: + pid_offset = pid - num_xblocks_24 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp100 = tl.load(in_ptr25 + (0)) + tmp101 = tl.broadcast_to(tmp100, [XBLOCK]) + tmp102 = 1.0 + tmp103 = tmp101 + tmp102 + tl.store(out_ptr25 + (tl.full([XBLOCK], 0, tl.int32)), tmp103, None) + elif pid < num_xblocks_26: + pid_offset = pid - num_xblocks_25 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp104 = tl.load(in_ptr26 + (0)) + tmp105 = tl.broadcast_to(tmp104, [XBLOCK]) + tmp106 = 1.0 + tmp107 = tmp105 + tmp106 + tl.store(out_ptr26 + (tl.full([XBLOCK], 0, tl.int32)), tmp107, None) + elif pid < num_xblocks_27: + pid_offset = pid - num_xblocks_26 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp108 = tl.load(in_ptr27 + (0)) + tmp109 = tl.broadcast_to(tmp108, [XBLOCK]) + tmp110 = 1.0 + tmp111 = tmp109 + tmp110 + tl.store(out_ptr27 + (tl.full([XBLOCK], 0, tl.int32)), tmp111, None) + elif pid < num_xblocks_28: + pid_offset = pid - num_xblocks_27 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp112 = tl.load(in_ptr28 + (0)) + tmp113 = tl.broadcast_to(tmp112, [XBLOCK]) + tmp114 = 1.0 + tmp115 = tmp113 + tmp114 + tl.store(out_ptr28 + (tl.full([XBLOCK], 0, tl.int32)), tmp115, None) + elif pid < num_xblocks_29: + pid_offset = pid - num_xblocks_28 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp116 = tl.load(in_ptr29 + (0)) + tmp117 = tl.broadcast_to(tmp116, [XBLOCK]) + tmp118 = 1.0 + tmp119 = tmp117 + tmp118 + tl.store(out_ptr29 + (tl.full([XBLOCK], 0, tl.int32)), tmp119, None) + elif pid < num_xblocks_30: + pid_offset = pid - num_xblocks_29 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp120 = tl.load(in_ptr30 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp122 = 1.0 + tmp123 = tmp121 + tmp122 + tl.store(out_ptr30 + (tl.full([XBLOCK], 0, tl.int32)), tmp123, None) + elif pid < num_xblocks_31: + pid_offset = pid - num_xblocks_30 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp124 = tl.load(in_ptr31 + (0)) + tmp125 = tl.broadcast_to(tmp124, [XBLOCK]) + tmp126 = 1.0 + tmp127 = tmp125 + tmp126 + tl.store(out_ptr31 + (tl.full([XBLOCK], 0, tl.int32)), tmp127, None) + elif pid < num_xblocks_32: + pid_offset = pid - num_xblocks_31 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp128 = tl.load(in_ptr32 + (0)) + tmp129 = tl.broadcast_to(tmp128, [XBLOCK]) + tmp130 = 1.0 + tmp131 = tmp129 + tmp130 + tl.store(out_ptr32 + (tl.full([XBLOCK], 0, tl.int32)), tmp131, None) + elif pid < num_xblocks_33: + pid_offset = pid - num_xblocks_32 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp132 = tl.load(in_ptr33 + (0)) + tmp133 = tl.broadcast_to(tmp132, [XBLOCK]) + tmp134 = 1.0 + tmp135 = tmp133 + tmp134 + tl.store(out_ptr33 + (tl.full([XBLOCK], 0, tl.int32)), tmp135, None) + elif pid < num_xblocks_34: + pid_offset = pid - num_xblocks_33 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp136 = tl.load(in_ptr34 + (0)) + tmp137 = tl.broadcast_to(tmp136, [XBLOCK]) + tmp138 = 1.0 + tmp139 = tmp137 + tmp138 + tl.store(out_ptr34 + (tl.full([XBLOCK], 0, tl.int32)), tmp139, None) + elif pid < num_xblocks_35: + pid_offset = pid - num_xblocks_34 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp140 = tl.load(in_ptr35 + (0)) + tmp141 = tl.broadcast_to(tmp140, [XBLOCK]) + tmp142 = 1.0 + tmp143 = tmp141 + tmp142 + tl.store(out_ptr35 + (tl.full([XBLOCK], 0, tl.int32)), tmp143, None) + elif pid < num_xblocks_36: + pid_offset = pid - num_xblocks_35 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp144 = tl.load(in_ptr36 + (0)) + tmp145 = tl.broadcast_to(tmp144, [XBLOCK]) + tmp146 = 1.0 + tmp147 = tmp145 + tmp146 + tl.store(out_ptr36 + (tl.full([XBLOCK], 0, tl.int32)), tmp147, None) + elif pid < num_xblocks_37: + pid_offset = pid - num_xblocks_36 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp148 = tl.load(in_ptr37 + (0)) + tmp149 = tl.broadcast_to(tmp148, [XBLOCK]) + tmp150 = 1.0 + tmp151 = tmp149 + tmp150 + tl.store(out_ptr37 + (tl.full([XBLOCK], 0, tl.int32)), tmp151, None) + elif pid < num_xblocks_38: + pid_offset = pid - num_xblocks_37 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp152 = tl.load(in_ptr38 + (0)) + tmp153 = tl.broadcast_to(tmp152, [XBLOCK]) + tmp154 = 1.0 + tmp155 = tmp153 + tmp154 + tl.store(out_ptr38 + (tl.full([XBLOCK], 0, tl.int32)), tmp155, None) + elif pid < num_xblocks_39: + pid_offset = pid - num_xblocks_38 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp156 = tl.load(in_ptr39 + (0)) + tmp157 = tl.broadcast_to(tmp156, [XBLOCK]) + tmp158 = 1.0 + tmp159 = tmp157 + tmp158 + tl.store(out_ptr39 + (tl.full([XBLOCK], 0, tl.int32)), tmp159, None) + elif pid < num_xblocks_40: + pid_offset = pid - num_xblocks_39 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp160 = tl.load(in_ptr40 + (0)) + tmp161 = tl.broadcast_to(tmp160, [XBLOCK]) + tmp162 = 1.0 + tmp163 = tmp161 + tmp162 + tl.store(out_ptr40 + (tl.full([XBLOCK], 0, tl.int32)), tmp163, None) + elif pid < num_xblocks_41: + pid_offset = pid - num_xblocks_40 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp164 = tl.load(in_ptr41 + (0)) + tmp165 = tl.broadcast_to(tmp164, [XBLOCK]) + tmp166 = 1.0 + tmp167 = tmp165 + tmp166 + tl.store(out_ptr41 + (tl.full([XBLOCK], 0, tl.int32)), tmp167, None) + elif pid < num_xblocks_42: + pid_offset = pid - num_xblocks_41 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp168 = tl.load(in_ptr42 + (0)) + tmp169 = tl.broadcast_to(tmp168, [XBLOCK]) + tmp170 = 1.0 + tmp171 = tmp169 + tmp170 + tl.store(out_ptr42 + (tl.full([XBLOCK], 0, tl.int32)), tmp171, None) + elif pid < num_xblocks_43: + pid_offset = pid - num_xblocks_42 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp172 = tl.load(in_ptr43 + (0)) + tmp173 = tl.broadcast_to(tmp172, [XBLOCK]) + tmp174 = 1.0 + tmp175 = tmp173 + tmp174 + tl.store(out_ptr43 + (tl.full([XBLOCK], 0, tl.int32)), tmp175, None) + elif pid < num_xblocks_44: + pid_offset = pid - num_xblocks_43 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp176 = tl.load(in_ptr44 + (0)) + tmp177 = tl.broadcast_to(tmp176, [XBLOCK]) + tmp178 = 1.0 + tmp179 = tmp177 + tmp178 + tl.store(out_ptr44 + (tl.full([XBLOCK], 0, tl.int32)), tmp179, None) + elif pid < num_xblocks_45: + pid_offset = pid - num_xblocks_44 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp180 = tl.load(in_ptr45 + (0)) + tmp181 = tl.broadcast_to(tmp180, [XBLOCK]) + tmp182 = 1.0 + tmp183 = tmp181 + tmp182 + tl.store(out_ptr45 + (tl.full([XBLOCK], 0, tl.int32)), tmp183, None) + elif pid < num_xblocks_46: + pid_offset = pid - num_xblocks_45 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp184 = tl.load(in_ptr46 + (0)) + tmp185 = tl.broadcast_to(tmp184, [XBLOCK]) + tmp186 = 1.0 + tmp187 = tmp185 + tmp186 + tl.store(out_ptr46 + (tl.full([XBLOCK], 0, tl.int32)), tmp187, None) + elif pid < num_xblocks_47: + pid_offset = pid - num_xblocks_46 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp188 = tl.load(in_ptr47 + (0)) + tmp189 = tl.broadcast_to(tmp188, [XBLOCK]) + tmp190 = 1.0 + tmp191 = tmp189 + tmp190 + tl.store(out_ptr47 + (tl.full([XBLOCK], 0, tl.int32)), tmp191, None) + elif pid < num_xblocks_48: + pid_offset = pid - num_xblocks_47 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp192 = tl.load(in_ptr48 + (0)) + tmp193 = tl.broadcast_to(tmp192, [XBLOCK]) + tmp194 = 1.0 + tmp195 = tmp193 + tmp194 + tl.store(out_ptr48 + (tl.full([XBLOCK], 0, tl.int32)), tmp195, None) + elif pid < num_xblocks_49: + pid_offset = pid - num_xblocks_48 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp196 = tl.load(in_ptr49 + (0)) + tmp197 = tl.broadcast_to(tmp196, [XBLOCK]) + tmp198 = 1.0 + tmp199 = tmp197 + tmp198 + tl.store(out_ptr49 + (tl.full([XBLOCK], 0, tl.int32)), tmp199, None) + elif pid < num_xblocks_50: + pid_offset = pid - num_xblocks_49 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp200 = tl.load(in_ptr50 + (0)) + tmp201 = tl.broadcast_to(tmp200, [XBLOCK]) + tmp202 = 1.0 + tmp203 = tmp201 + tmp202 + tl.store(out_ptr50 + (tl.full([XBLOCK], 0, tl.int32)), tmp203, None) + elif pid < num_xblocks_51: + pid_offset = pid - num_xblocks_50 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp204 = tl.load(in_ptr51 + (0)) + tmp205 = tl.broadcast_to(tmp204, [XBLOCK]) + tmp206 = 1.0 + tmp207 = tmp205 + tmp206 + tl.store(out_ptr51 + (tl.full([XBLOCK], 0, tl.int32)), tmp207, None) + elif pid < num_xblocks_52: + pid_offset = pid - num_xblocks_51 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp208 = tl.load(in_ptr52 + (0)) + tmp209 = tl.broadcast_to(tmp208, [XBLOCK]) + tmp210 = 1.0 + tmp211 = tmp209 + tmp210 + tl.store(out_ptr52 + (tl.full([XBLOCK], 0, tl.int32)), tmp211, None) + elif pid < num_xblocks_53: + pid_offset = pid - num_xblocks_52 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp212 = tl.load(in_ptr53 + (0)) + tmp213 = tl.broadcast_to(tmp212, [XBLOCK]) + tmp214 = 1.0 + tmp215 = tmp213 + tmp214 + tl.store(out_ptr53 + (tl.full([XBLOCK], 0, tl.int32)), tmp215, None) + elif pid < num_xblocks_54: + pid_offset = pid - num_xblocks_53 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp216 = tl.load(in_ptr54 + (0)) + tmp217 = tl.broadcast_to(tmp216, [XBLOCK]) + tmp218 = 1.0 + tmp219 = tmp217 + tmp218 + tl.store(out_ptr54 + (tl.full([XBLOCK], 0, tl.int32)), tmp219, None) + elif pid < num_xblocks_55: + pid_offset = pid - num_xblocks_54 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp220 = tl.load(in_ptr55 + (0)) + tmp221 = tl.broadcast_to(tmp220, [XBLOCK]) + tmp222 = 1.0 + tmp223 = tmp221 + tmp222 + tl.store(out_ptr55 + (tl.full([XBLOCK], 0, tl.int32)), tmp223, None) + elif pid < num_xblocks_56: + pid_offset = pid - num_xblocks_55 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp224 = tl.load(in_ptr56 + (0)) + tmp225 = tl.broadcast_to(tmp224, [XBLOCK]) + tmp226 = 1.0 + tmp227 = tmp225 + tmp226 + tl.store(out_ptr56 + (tl.full([XBLOCK], 0, tl.int32)), tmp227, None) + elif pid < num_xblocks_57: + pid_offset = pid - num_xblocks_56 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp228 = tl.load(in_ptr57 + (0)) + tmp229 = tl.broadcast_to(tmp228, [XBLOCK]) + tmp230 = 1.0 + tmp231 = tmp229 + tmp230 + tl.store(out_ptr57 + (tl.full([XBLOCK], 0, tl.int32)), tmp231, None) + elif pid < num_xblocks_58: + pid_offset = pid - num_xblocks_57 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp232 = tl.load(in_ptr58 + (0)) + tmp233 = tl.broadcast_to(tmp232, [XBLOCK]) + tmp234 = 1.0 + tmp235 = tmp233 + tmp234 + tl.store(out_ptr58 + (tl.full([XBLOCK], 0, tl.int32)), tmp235, None) + elif pid < num_xblocks_59: + pid_offset = pid - num_xblocks_58 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp236 = tl.load(in_ptr59 + (0)) + tmp237 = tl.broadcast_to(tmp236, [XBLOCK]) + tmp238 = 1.0 + tmp239 = tmp237 + tmp238 + tl.store(out_ptr59 + (tl.full([XBLOCK], 0, tl.int32)), tmp239, None) + elif pid < num_xblocks_60: + pid_offset = pid - num_xblocks_59 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp240 = tl.load(in_ptr60 + (0)) + tmp241 = tl.broadcast_to(tmp240, [XBLOCK]) + tmp242 = 1.0 + tmp243 = tmp241 + tmp242 + tl.store(out_ptr60 + (tl.full([XBLOCK], 0, tl.int32)), tmp243, None) + elif pid < num_xblocks_61: + pid_offset = pid - num_xblocks_60 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp244 = tl.load(in_ptr61 + (0)) + tmp245 = tl.broadcast_to(tmp244, [XBLOCK]) + tmp246 = 1.0 + tmp247 = tmp245 + tmp246 + tl.store(out_ptr61 + (tl.full([XBLOCK], 0, tl.int32)), tmp247, None) + elif pid < num_xblocks_62: + pid_offset = pid - num_xblocks_61 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp248 = tl.load(in_ptr62 + (0)) + tmp249 = tl.broadcast_to(tmp248, [XBLOCK]) + tmp250 = 1.0 + tmp251 = tmp249 + tmp250 + tl.store(out_ptr62 + (tl.full([XBLOCK], 0, tl.int32)), tmp251, None) + elif pid < num_xblocks_63: + pid_offset = pid - num_xblocks_62 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp252 = tl.load(in_ptr63 + (0)) + tmp253 = tl.broadcast_to(tmp252, [XBLOCK]) + tmp254 = 1.0 + tmp255 = tmp253 + tmp254 + tl.store(out_ptr63 + (tl.full([XBLOCK], 0, tl.int32)), tmp255, None) + elif pid < num_xblocks_64: + pid_offset = pid - num_xblocks_63 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp256 = tl.load(in_ptr64 + (0)) + tmp257 = tl.broadcast_to(tmp256, [XBLOCK]) + tmp258 = 1.0 + tmp259 = tmp257 + tmp258 + tl.store(out_ptr64 + (tl.full([XBLOCK], 0, tl.int32)), tmp259, None) + elif pid < num_xblocks_65: + pid_offset = pid - num_xblocks_64 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp260 = tl.load(in_ptr65 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp262 = 1.0 + tmp263 = tmp261 + tmp262 + tl.store(out_ptr65 + (tl.full([XBLOCK], 0, tl.int32)), tmp263, None) + elif pid < num_xblocks_66: + pid_offset = pid - num_xblocks_65 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp264 = tl.load(in_ptr66 + (0)) + tmp265 = tl.broadcast_to(tmp264, [XBLOCK]) + tmp266 = 1.0 + tmp267 = tmp265 + tmp266 + tl.store(out_ptr66 + (tl.full([XBLOCK], 0, tl.int32)), tmp267, None) + elif pid < num_xblocks_67: + pid_offset = pid - num_xblocks_66 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp268 = tl.load(in_ptr67 + (0)) + tmp269 = tl.broadcast_to(tmp268, [XBLOCK]) + tmp270 = 1.0 + tmp271 = tmp269 + tmp270 + tl.store(out_ptr67 + (tl.full([XBLOCK], 0, tl.int32)), tmp271, None) + elif pid < num_xblocks_68: + pid_offset = pid - num_xblocks_67 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp272 = tl.load(in_ptr68 + (0)) + tmp273 = tl.broadcast_to(tmp272, [XBLOCK]) + tmp274 = 1.0 + tmp275 = tmp273 + tmp274 + tl.store(out_ptr68 + (tl.full([XBLOCK], 0, tl.int32)), tmp275, None) + elif pid < num_xblocks_69: + pid_offset = pid - num_xblocks_68 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp276 = tl.load(in_ptr69 + (0)) + tmp277 = tl.broadcast_to(tmp276, [XBLOCK]) + tmp278 = 1.0 + tmp279 = tmp277 + tmp278 + tl.store(out_ptr69 + (tl.full([XBLOCK], 0, tl.int32)), tmp279, None) + elif pid < num_xblocks_70: + pid_offset = pid - num_xblocks_69 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp280 = tl.load(in_ptr70 + (0)) + tmp281 = tl.broadcast_to(tmp280, [XBLOCK]) + tmp282 = 1.0 + tmp283 = tmp281 + tmp282 + tl.store(out_ptr70 + (tl.full([XBLOCK], 0, tl.int32)), tmp283, None) + elif pid < num_xblocks_71: + pid_offset = pid - num_xblocks_70 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp284 = tl.load(in_ptr71 + (0)) + tmp285 = tl.broadcast_to(tmp284, [XBLOCK]) + tmp286 = 1.0 + tmp287 = tmp285 + tmp286 + tl.store(out_ptr71 + (tl.full([XBLOCK], 0, tl.int32)), tmp287, None) + elif pid < num_xblocks_72: + pid_offset = pid - num_xblocks_71 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp288 = tl.load(in_ptr72 + (0)) + tmp289 = tl.broadcast_to(tmp288, [XBLOCK]) + tmp290 = 1.0 + tmp291 = tmp289 + tmp290 + tl.store(out_ptr72 + (tl.full([XBLOCK], 0, tl.int32)), tmp291, None) + elif pid < num_xblocks_73: + pid_offset = pid - num_xblocks_72 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp292 = tl.load(in_ptr73 + (0)) + tmp293 = tl.broadcast_to(tmp292, [XBLOCK]) + tmp294 = 1.0 + tmp295 = tmp293 + tmp294 + tl.store(out_ptr73 + (tl.full([XBLOCK], 0, tl.int32)), tmp295, None) + elif pid < num_xblocks_74: + pid_offset = pid - num_xblocks_73 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp296 = tl.load(in_ptr74 + (0)) + tmp297 = tl.broadcast_to(tmp296, [XBLOCK]) + tmp298 = 1.0 + tmp299 = tmp297 + tmp298 + tl.store(out_ptr74 + (tl.full([XBLOCK], 0, tl.int32)), tmp299, None) + elif pid < num_xblocks_75: + pid_offset = pid - num_xblocks_74 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp300 = tl.load(in_ptr75 + (0)) + tmp301 = tl.broadcast_to(tmp300, [XBLOCK]) + tmp302 = 1.0 + tmp303 = tmp301 + tmp302 + tl.store(out_ptr75 + (tl.full([XBLOCK], 0, tl.int32)), tmp303, None) + elif pid < num_xblocks_76: + pid_offset = pid - num_xblocks_75 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp304 = tl.load(in_ptr76 + (0)) + tmp305 = tl.broadcast_to(tmp304, [XBLOCK]) + tmp306 = 1.0 + tmp307 = tmp305 + tmp306 + tl.store(out_ptr76 + (tl.full([XBLOCK], 0, tl.int32)), tmp307, None) + elif pid < num_xblocks_77: + pid_offset = pid - num_xblocks_76 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp308 = tl.load(in_ptr77 + (0)) + tmp309 = tl.broadcast_to(tmp308, [XBLOCK]) + tmp310 = 1.0 + tmp311 = tmp309 + tmp310 + tl.store(out_ptr77 + (tl.full([XBLOCK], 0, tl.int32)), tmp311, None) + elif pid < num_xblocks_78: + pid_offset = pid - num_xblocks_77 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp312 = tl.load(in_ptr78 + (0)) + tmp313 = tl.broadcast_to(tmp312, [XBLOCK]) + tmp314 = 1.0 + tmp315 = tmp313 + tmp314 + tl.store(out_ptr78 + (tl.full([XBLOCK], 0, tl.int32)), tmp315, None) + elif pid < num_xblocks_79: + pid_offset = pid - num_xblocks_78 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp316 = tl.load(in_ptr79 + (0)) + tmp317 = tl.broadcast_to(tmp316, [XBLOCK]) + tmp318 = 1.0 + tmp319 = tmp317 + tmp318 + tl.store(out_ptr79 + (tl.full([XBLOCK], 0, tl.int32)), tmp319, None) + elif pid < num_xblocks_80: + pid_offset = pid - num_xblocks_79 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp320 = tl.load(in_ptr80 + (0)) + tmp321 = tl.broadcast_to(tmp320, [XBLOCK]) + tmp322 = 1.0 + tmp323 = tmp321 + tmp322 + tl.store(out_ptr80 + (tl.full([XBLOCK], 0, tl.int32)), tmp323, None) + elif pid < num_xblocks_81: + pid_offset = pid - num_xblocks_80 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp324 = tl.load(in_ptr81 + (0)) + tmp325 = tl.broadcast_to(tmp324, [XBLOCK]) + tmp326 = 1.0 + tmp327 = tmp325 + tmp326 + tl.store(out_ptr81 + (tl.full([XBLOCK], 0, tl.int32)), tmp327, None) + elif pid < num_xblocks_82: + pid_offset = pid - num_xblocks_81 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp328 = tl.load(in_ptr82 + (0)) + tmp329 = tl.broadcast_to(tmp328, [XBLOCK]) + tmp330 = 1.0 + tmp331 = tmp329 + tmp330 + tl.store(out_ptr82 + (tl.full([XBLOCK], 0, tl.int32)), tmp331, None) + else: + pass + ''', device_str='cuda') + from torch._C import _cuda_getCurrentRawStream as get_raw_stream + + import triton + import triton.language as tl + from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph + + + # kernel path: /tmp/tmp2ln889l5/qy/cqyob3vusnsr6m3lyzdsbv63hei53t3lq7sf2utuh2x5menn4h2w.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_1 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_1', 'mutated_arg_names': ['in_ptr0', 'in_ptr1', 'in_ptr10', 'in_ptr11', 'in_ptr12', 'in_ptr13', 'in_ptr14', 'in_ptr15', 'in_ptr16', 'in_ptr17', 'in_ptr18', 'in_ptr19', 'in_ptr2', 'in_ptr20', 'in_ptr21', 'in_ptr22', 'in_ptr23', 'in_ptr24', 'in_ptr25', 'in_ptr26', 'in_ptr27', 'in_ptr28', 'in_ptr29', 'in_ptr3', 'in_ptr30', 'in_ptr31', 'in_ptr32', 'in_ptr33', 'in_ptr34', 'in_ptr35', 'in_ptr36', 'in_ptr37', 'in_ptr38', 'in_ptr39', 'in_ptr4', 'in_ptr40', 'in_ptr41', 'in_ptr42', 'in_ptr43', 'in_ptr44', 'in_ptr45', 'in_ptr46', 'in_ptr47', 'in_ptr48', 'in_ptr49', 'in_ptr5', 'in_ptr50', 'in_ptr51', 'in_ptr52', 'in_ptr53', 'in_ptr54', 'in_ptr55', 'in_ptr56', 'in_ptr57', 'in_ptr58', 'in_ptr59', 'in_ptr6', 'in_ptr60', 'in_ptr61', 'in_ptr62', 'in_ptr63', 'in_ptr64', 'in_ptr7', 'in_ptr8', 'in_ptr9', 'out_ptr0', 'out_ptr1', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr13', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr17', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr21', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr25', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr29', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr33', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr37', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr41', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr45', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr49', 'out_ptr5', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr53', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr57', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr61', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr7', 'out_ptr8', 'out_ptr9'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, out_ptr0, out_ptr1, out_ptr2, out_ptr3, out_ptr4, out_ptr5, out_ptr6, out_ptr7, out_ptr8, out_ptr9, out_ptr10, out_ptr11, out_ptr12, out_ptr13, out_ptr14, out_ptr15, out_ptr16, out_ptr17, out_ptr18, out_ptr19, out_ptr20, out_ptr21, out_ptr22, out_ptr23, out_ptr24, out_ptr25, out_ptr26, out_ptr27, out_ptr28, out_ptr29, out_ptr30, out_ptr31, out_ptr32, out_ptr33, out_ptr34, out_ptr35, out_ptr36, out_ptr37, out_ptr38, out_ptr39, out_ptr40, out_ptr41, out_ptr42, out_ptr43, out_ptr44, out_ptr45, out_ptr46, out_ptr47, out_ptr48, out_ptr49, out_ptr50, out_ptr51, out_ptr52, out_ptr53, out_ptr54, out_ptr55, out_ptr56, out_ptr57, out_ptr58, out_ptr59, out_ptr60, out_ptr61, out_ptr62, out_ptr63, out_ptr64): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(1, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(1, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(1, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(1, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(1, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(1, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(1, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(1, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(1, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(1, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(1, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(1, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(1, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(1, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(1, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(1, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(1, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(1, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(1, XBLOCK) + num_xblocks_19 = num_xblocks_18 + tl.cdiv(1, XBLOCK) + num_xblocks_20 = num_xblocks_19 + tl.cdiv(1, XBLOCK) + num_xblocks_21 = num_xblocks_20 + tl.cdiv(1, XBLOCK) + num_xblocks_22 = num_xblocks_21 + tl.cdiv(1, XBLOCK) + num_xblocks_23 = num_xblocks_22 + tl.cdiv(1, XBLOCK) + num_xblocks_24 = num_xblocks_23 + tl.cdiv(1, XBLOCK) + num_xblocks_25 = num_xblocks_24 + tl.cdiv(1, XBLOCK) + num_xblocks_26 = num_xblocks_25 + tl.cdiv(1, XBLOCK) + num_xblocks_27 = num_xblocks_26 + tl.cdiv(1, XBLOCK) + num_xblocks_28 = num_xblocks_27 + tl.cdiv(1, XBLOCK) + num_xblocks_29 = num_xblocks_28 + tl.cdiv(1, XBLOCK) + num_xblocks_30 = num_xblocks_29 + tl.cdiv(1, XBLOCK) + num_xblocks_31 = num_xblocks_30 + tl.cdiv(1, XBLOCK) + num_xblocks_32 = num_xblocks_31 + tl.cdiv(1, XBLOCK) + num_xblocks_33 = num_xblocks_32 + tl.cdiv(1, XBLOCK) + num_xblocks_34 = num_xblocks_33 + tl.cdiv(1, XBLOCK) + num_xblocks_35 = num_xblocks_34 + tl.cdiv(1, XBLOCK) + num_xblocks_36 = num_xblocks_35 + tl.cdiv(1, XBLOCK) + num_xblocks_37 = num_xblocks_36 + tl.cdiv(1, XBLOCK) + num_xblocks_38 = num_xblocks_37 + tl.cdiv(1, XBLOCK) + num_xblocks_39 = num_xblocks_38 + tl.cdiv(1, XBLOCK) + num_xblocks_40 = num_xblocks_39 + tl.cdiv(1, XBLOCK) + num_xblocks_41 = num_xblocks_40 + tl.cdiv(1, XBLOCK) + num_xblocks_42 = num_xblocks_41 + tl.cdiv(1, XBLOCK) + num_xblocks_43 = num_xblocks_42 + tl.cdiv(1, XBLOCK) + num_xblocks_44 = num_xblocks_43 + tl.cdiv(1, XBLOCK) + num_xblocks_45 = num_xblocks_44 + tl.cdiv(1, XBLOCK) + num_xblocks_46 = num_xblocks_45 + tl.cdiv(1, XBLOCK) + num_xblocks_47 = num_xblocks_46 + tl.cdiv(1, XBLOCK) + num_xblocks_48 = num_xblocks_47 + tl.cdiv(1, XBLOCK) + num_xblocks_49 = num_xblocks_48 + tl.cdiv(1, XBLOCK) + num_xblocks_50 = num_xblocks_49 + tl.cdiv(1, XBLOCK) + num_xblocks_51 = num_xblocks_50 + tl.cdiv(1, XBLOCK) + num_xblocks_52 = num_xblocks_51 + tl.cdiv(1, XBLOCK) + num_xblocks_53 = num_xblocks_52 + tl.cdiv(1, XBLOCK) + num_xblocks_54 = num_xblocks_53 + tl.cdiv(1, XBLOCK) + num_xblocks_55 = num_xblocks_54 + tl.cdiv(1, XBLOCK) + num_xblocks_56 = num_xblocks_55 + tl.cdiv(1, XBLOCK) + num_xblocks_57 = num_xblocks_56 + tl.cdiv(1, XBLOCK) + num_xblocks_58 = num_xblocks_57 + tl.cdiv(1, XBLOCK) + num_xblocks_59 = num_xblocks_58 + tl.cdiv(1, XBLOCK) + num_xblocks_60 = num_xblocks_59 + tl.cdiv(1, XBLOCK) + num_xblocks_61 = num_xblocks_60 + tl.cdiv(1, XBLOCK) + num_xblocks_62 = num_xblocks_61 + tl.cdiv(1, XBLOCK) + num_xblocks_63 = num_xblocks_62 + tl.cdiv(1, XBLOCK) + num_xblocks_64 = num_xblocks_63 + tl.cdiv(1, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp0 = tl.load(in_ptr0 + (0)) + tmp1 = tl.broadcast_to(tmp0, [XBLOCK]) + tmp2 = 1.0 + tmp3 = tmp1 + tmp2 + tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp3, None) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp4 = tl.load(in_ptr1 + (0)) + tmp5 = tl.broadcast_to(tmp4, [XBLOCK]) + tmp6 = 1.0 + tmp7 = tmp5 + tmp6 + tl.store(out_ptr1 + (tl.full([XBLOCK], 0, tl.int32)), tmp7, None) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp8 = tl.load(in_ptr2 + (0)) + tmp9 = tl.broadcast_to(tmp8, [XBLOCK]) + tmp10 = 1.0 + tmp11 = tmp9 + tmp10 + tl.store(out_ptr2 + (tl.full([XBLOCK], 0, tl.int32)), tmp11, None) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp12 = tl.load(in_ptr3 + (0)) + tmp13 = tl.broadcast_to(tmp12, [XBLOCK]) + tmp14 = 1.0 + tmp15 = tmp13 + tmp14 + tl.store(out_ptr3 + (tl.full([XBLOCK], 0, tl.int32)), tmp15, None) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp16 = tl.load(in_ptr4 + (0)) + tmp17 = tl.broadcast_to(tmp16, [XBLOCK]) + tmp18 = 1.0 + tmp19 = tmp17 + tmp18 + tl.store(out_ptr4 + (tl.full([XBLOCK], 0, tl.int32)), tmp19, None) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp20 = tl.load(in_ptr5 + (0)) + tmp21 = tl.broadcast_to(tmp20, [XBLOCK]) + tmp22 = 1.0 + tmp23 = tmp21 + tmp22 + tl.store(out_ptr5 + (tl.full([XBLOCK], 0, tl.int32)), tmp23, None) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp24 = tl.load(in_ptr6 + (0)) + tmp25 = tl.broadcast_to(tmp24, [XBLOCK]) + tmp26 = 1.0 + tmp27 = tmp25 + tmp26 + tl.store(out_ptr6 + (tl.full([XBLOCK], 0, tl.int32)), tmp27, None) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp28 = tl.load(in_ptr7 + (0)) + tmp29 = tl.broadcast_to(tmp28, [XBLOCK]) + tmp30 = 1.0 + tmp31 = tmp29 + tmp30 + tl.store(out_ptr7 + (tl.full([XBLOCK], 0, tl.int32)), tmp31, None) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp32 = tl.load(in_ptr8 + (0)) + tmp33 = tl.broadcast_to(tmp32, [XBLOCK]) + tmp34 = 1.0 + tmp35 = tmp33 + tmp34 + tl.store(out_ptr8 + (tl.full([XBLOCK], 0, tl.int32)), tmp35, None) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp36 = tl.load(in_ptr9 + (0)) + tmp37 = tl.broadcast_to(tmp36, [XBLOCK]) + tmp38 = 1.0 + tmp39 = tmp37 + tmp38 + tl.store(out_ptr9 + (tl.full([XBLOCK], 0, tl.int32)), tmp39, None) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp40 = tl.load(in_ptr10 + (0)) + tmp41 = tl.broadcast_to(tmp40, [XBLOCK]) + tmp42 = 1.0 + tmp43 = tmp41 + tmp42 + tl.store(out_ptr10 + (tl.full([XBLOCK], 0, tl.int32)), tmp43, None) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp44 = tl.load(in_ptr11 + (0)) + tmp45 = tl.broadcast_to(tmp44, [XBLOCK]) + tmp46 = 1.0 + tmp47 = tmp45 + tmp46 + tl.store(out_ptr11 + (tl.full([XBLOCK], 0, tl.int32)), tmp47, None) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp48 = tl.load(in_ptr12 + (0)) + tmp49 = tl.broadcast_to(tmp48, [XBLOCK]) + tmp50 = 1.0 + tmp51 = tmp49 + tmp50 + tl.store(out_ptr12 + (tl.full([XBLOCK], 0, tl.int32)), tmp51, None) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp52 = tl.load(in_ptr13 + (0)) + tmp53 = tl.broadcast_to(tmp52, [XBLOCK]) + tmp54 = 1.0 + tmp55 = tmp53 + tmp54 + tl.store(out_ptr13 + (tl.full([XBLOCK], 0, tl.int32)), tmp55, None) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp56 = tl.load(in_ptr14 + (0)) + tmp57 = tl.broadcast_to(tmp56, [XBLOCK]) + tmp58 = 1.0 + tmp59 = tmp57 + tmp58 + tl.store(out_ptr14 + (tl.full([XBLOCK], 0, tl.int32)), tmp59, None) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp60 = tl.load(in_ptr15 + (0)) + tmp61 = tl.broadcast_to(tmp60, [XBLOCK]) + tmp62 = 1.0 + tmp63 = tmp61 + tmp62 + tl.store(out_ptr15 + (tl.full([XBLOCK], 0, tl.int32)), tmp63, None) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp64 = tl.load(in_ptr16 + (0)) + tmp65 = tl.broadcast_to(tmp64, [XBLOCK]) + tmp66 = 1.0 + tmp67 = tmp65 + tmp66 + tl.store(out_ptr16 + (tl.full([XBLOCK], 0, tl.int32)), tmp67, None) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp68 = tl.load(in_ptr17 + (0)) + tmp69 = tl.broadcast_to(tmp68, [XBLOCK]) + tmp70 = 1.0 + tmp71 = tmp69 + tmp70 + tl.store(out_ptr17 + (tl.full([XBLOCK], 0, tl.int32)), tmp71, None) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp72 = tl.load(in_ptr18 + (0)) + tmp73 = tl.broadcast_to(tmp72, [XBLOCK]) + tmp74 = 1.0 + tmp75 = tmp73 + tmp74 + tl.store(out_ptr18 + (tl.full([XBLOCK], 0, tl.int32)), tmp75, None) + elif pid < num_xblocks_19: + pid_offset = pid - num_xblocks_18 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp76 = tl.load(in_ptr19 + (0)) + tmp77 = tl.broadcast_to(tmp76, [XBLOCK]) + tmp78 = 1.0 + tmp79 = tmp77 + tmp78 + tl.store(out_ptr19 + (tl.full([XBLOCK], 0, tl.int32)), tmp79, None) + elif pid < num_xblocks_20: + pid_offset = pid - num_xblocks_19 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp80 = tl.load(in_ptr20 + (0)) + tmp81 = tl.broadcast_to(tmp80, [XBLOCK]) + tmp82 = 1.0 + tmp83 = tmp81 + tmp82 + tl.store(out_ptr20 + (tl.full([XBLOCK], 0, tl.int32)), tmp83, None) + elif pid < num_xblocks_21: + pid_offset = pid - num_xblocks_20 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp84 = tl.load(in_ptr21 + (0)) + tmp85 = tl.broadcast_to(tmp84, [XBLOCK]) + tmp86 = 1.0 + tmp87 = tmp85 + tmp86 + tl.store(out_ptr21 + (tl.full([XBLOCK], 0, tl.int32)), tmp87, None) + elif pid < num_xblocks_22: + pid_offset = pid - num_xblocks_21 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp88 = tl.load(in_ptr22 + (0)) + tmp89 = tl.broadcast_to(tmp88, [XBLOCK]) + tmp90 = 1.0 + tmp91 = tmp89 + tmp90 + tl.store(out_ptr22 + (tl.full([XBLOCK], 0, tl.int32)), tmp91, None) + elif pid < num_xblocks_23: + pid_offset = pid - num_xblocks_22 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp92 = tl.load(in_ptr23 + (0)) + tmp93 = tl.broadcast_to(tmp92, [XBLOCK]) + tmp94 = 1.0 + tmp95 = tmp93 + tmp94 + tl.store(out_ptr23 + (tl.full([XBLOCK], 0, tl.int32)), tmp95, None) + elif pid < num_xblocks_24: + pid_offset = pid - num_xblocks_23 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp96 = tl.load(in_ptr24 + (0)) + tmp97 = tl.broadcast_to(tmp96, [XBLOCK]) + tmp98 = 1.0 + tmp99 = tmp97 + tmp98 + tl.store(out_ptr24 + (tl.full([XBLOCK], 0, tl.int32)), tmp99, None) + elif pid < num_xblocks_25: + pid_offset = pid - num_xblocks_24 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp100 = tl.load(in_ptr25 + (0)) + tmp101 = tl.broadcast_to(tmp100, [XBLOCK]) + tmp102 = 1.0 + tmp103 = tmp101 + tmp102 + tl.store(out_ptr25 + (tl.full([XBLOCK], 0, tl.int32)), tmp103, None) + elif pid < num_xblocks_26: + pid_offset = pid - num_xblocks_25 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp104 = tl.load(in_ptr26 + (0)) + tmp105 = tl.broadcast_to(tmp104, [XBLOCK]) + tmp106 = 1.0 + tmp107 = tmp105 + tmp106 + tl.store(out_ptr26 + (tl.full([XBLOCK], 0, tl.int32)), tmp107, None) + elif pid < num_xblocks_27: + pid_offset = pid - num_xblocks_26 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp108 = tl.load(in_ptr27 + (0)) + tmp109 = tl.broadcast_to(tmp108, [XBLOCK]) + tmp110 = 1.0 + tmp111 = tmp109 + tmp110 + tl.store(out_ptr27 + (tl.full([XBLOCK], 0, tl.int32)), tmp111, None) + elif pid < num_xblocks_28: + pid_offset = pid - num_xblocks_27 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp112 = tl.load(in_ptr28 + (0)) + tmp113 = tl.broadcast_to(tmp112, [XBLOCK]) + tmp114 = 1.0 + tmp115 = tmp113 + tmp114 + tl.store(out_ptr28 + (tl.full([XBLOCK], 0, tl.int32)), tmp115, None) + elif pid < num_xblocks_29: + pid_offset = pid - num_xblocks_28 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp116 = tl.load(in_ptr29 + (0)) + tmp117 = tl.broadcast_to(tmp116, [XBLOCK]) + tmp118 = 1.0 + tmp119 = tmp117 + tmp118 + tl.store(out_ptr29 + (tl.full([XBLOCK], 0, tl.int32)), tmp119, None) + elif pid < num_xblocks_30: + pid_offset = pid - num_xblocks_29 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp120 = tl.load(in_ptr30 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp122 = 1.0 + tmp123 = tmp121 + tmp122 + tl.store(out_ptr30 + (tl.full([XBLOCK], 0, tl.int32)), tmp123, None) + elif pid < num_xblocks_31: + pid_offset = pid - num_xblocks_30 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp124 = tl.load(in_ptr31 + (0)) + tmp125 = tl.broadcast_to(tmp124, [XBLOCK]) + tmp126 = 1.0 + tmp127 = tmp125 + tmp126 + tl.store(out_ptr31 + (tl.full([XBLOCK], 0, tl.int32)), tmp127, None) + elif pid < num_xblocks_32: + pid_offset = pid - num_xblocks_31 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp128 = tl.load(in_ptr32 + (0)) + tmp129 = tl.broadcast_to(tmp128, [XBLOCK]) + tmp130 = 1.0 + tmp131 = tmp129 + tmp130 + tl.store(out_ptr32 + (tl.full([XBLOCK], 0, tl.int32)), tmp131, None) + elif pid < num_xblocks_33: + pid_offset = pid - num_xblocks_32 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp132 = tl.load(in_ptr33 + (0)) + tmp133 = tl.broadcast_to(tmp132, [XBLOCK]) + tmp134 = 1.0 + tmp135 = tmp133 + tmp134 + tl.store(out_ptr33 + (tl.full([XBLOCK], 0, tl.int32)), tmp135, None) + elif pid < num_xblocks_34: + pid_offset = pid - num_xblocks_33 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp136 = tl.load(in_ptr34 + (0)) + tmp137 = tl.broadcast_to(tmp136, [XBLOCK]) + tmp138 = 1.0 + tmp139 = tmp137 + tmp138 + tl.store(out_ptr34 + (tl.full([XBLOCK], 0, tl.int32)), tmp139, None) + elif pid < num_xblocks_35: + pid_offset = pid - num_xblocks_34 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp140 = tl.load(in_ptr35 + (0)) + tmp141 = tl.broadcast_to(tmp140, [XBLOCK]) + tmp142 = 1.0 + tmp143 = tmp141 + tmp142 + tl.store(out_ptr35 + (tl.full([XBLOCK], 0, tl.int32)), tmp143, None) + elif pid < num_xblocks_36: + pid_offset = pid - num_xblocks_35 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp144 = tl.load(in_ptr36 + (0)) + tmp145 = tl.broadcast_to(tmp144, [XBLOCK]) + tmp146 = 1.0 + tmp147 = tmp145 + tmp146 + tl.store(out_ptr36 + (tl.full([XBLOCK], 0, tl.int32)), tmp147, None) + elif pid < num_xblocks_37: + pid_offset = pid - num_xblocks_36 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp148 = tl.load(in_ptr37 + (0)) + tmp149 = tl.broadcast_to(tmp148, [XBLOCK]) + tmp150 = 1.0 + tmp151 = tmp149 + tmp150 + tl.store(out_ptr37 + (tl.full([XBLOCK], 0, tl.int32)), tmp151, None) + elif pid < num_xblocks_38: + pid_offset = pid - num_xblocks_37 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp152 = tl.load(in_ptr38 + (0)) + tmp153 = tl.broadcast_to(tmp152, [XBLOCK]) + tmp154 = 1.0 + tmp155 = tmp153 + tmp154 + tl.store(out_ptr38 + (tl.full([XBLOCK], 0, tl.int32)), tmp155, None) + elif pid < num_xblocks_39: + pid_offset = pid - num_xblocks_38 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp156 = tl.load(in_ptr39 + (0)) + tmp157 = tl.broadcast_to(tmp156, [XBLOCK]) + tmp158 = 1.0 + tmp159 = tmp157 + tmp158 + tl.store(out_ptr39 + (tl.full([XBLOCK], 0, tl.int32)), tmp159, None) + elif pid < num_xblocks_40: + pid_offset = pid - num_xblocks_39 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp160 = tl.load(in_ptr40 + (0)) + tmp161 = tl.broadcast_to(tmp160, [XBLOCK]) + tmp162 = 1.0 + tmp163 = tmp161 + tmp162 + tl.store(out_ptr40 + (tl.full([XBLOCK], 0, tl.int32)), tmp163, None) + elif pid < num_xblocks_41: + pid_offset = pid - num_xblocks_40 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp164 = tl.load(in_ptr41 + (0)) + tmp165 = tl.broadcast_to(tmp164, [XBLOCK]) + tmp166 = 1.0 + tmp167 = tmp165 + tmp166 + tl.store(out_ptr41 + (tl.full([XBLOCK], 0, tl.int32)), tmp167, None) + elif pid < num_xblocks_42: + pid_offset = pid - num_xblocks_41 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp168 = tl.load(in_ptr42 + (0)) + tmp169 = tl.broadcast_to(tmp168, [XBLOCK]) + tmp170 = 1.0 + tmp171 = tmp169 + tmp170 + tl.store(out_ptr42 + (tl.full([XBLOCK], 0, tl.int32)), tmp171, None) + elif pid < num_xblocks_43: + pid_offset = pid - num_xblocks_42 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp172 = tl.load(in_ptr43 + (0)) + tmp173 = tl.broadcast_to(tmp172, [XBLOCK]) + tmp174 = 1.0 + tmp175 = tmp173 + tmp174 + tl.store(out_ptr43 + (tl.full([XBLOCK], 0, tl.int32)), tmp175, None) + elif pid < num_xblocks_44: + pid_offset = pid - num_xblocks_43 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp176 = tl.load(in_ptr44 + (0)) + tmp177 = tl.broadcast_to(tmp176, [XBLOCK]) + tmp178 = 1.0 + tmp179 = tmp177 + tmp178 + tl.store(out_ptr44 + (tl.full([XBLOCK], 0, tl.int32)), tmp179, None) + elif pid < num_xblocks_45: + pid_offset = pid - num_xblocks_44 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp180 = tl.load(in_ptr45 + (0)) + tmp181 = tl.broadcast_to(tmp180, [XBLOCK]) + tmp182 = 1.0 + tmp183 = tmp181 + tmp182 + tl.store(out_ptr45 + (tl.full([XBLOCK], 0, tl.int32)), tmp183, None) + elif pid < num_xblocks_46: + pid_offset = pid - num_xblocks_45 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp184 = tl.load(in_ptr46 + (0)) + tmp185 = tl.broadcast_to(tmp184, [XBLOCK]) + tmp186 = 1.0 + tmp187 = tmp185 + tmp186 + tl.store(out_ptr46 + (tl.full([XBLOCK], 0, tl.int32)), tmp187, None) + elif pid < num_xblocks_47: + pid_offset = pid - num_xblocks_46 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp188 = tl.load(in_ptr47 + (0)) + tmp189 = tl.broadcast_to(tmp188, [XBLOCK]) + tmp190 = 1.0 + tmp191 = tmp189 + tmp190 + tl.store(out_ptr47 + (tl.full([XBLOCK], 0, tl.int32)), tmp191, None) + elif pid < num_xblocks_48: + pid_offset = pid - num_xblocks_47 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp192 = tl.load(in_ptr48 + (0)) + tmp193 = tl.broadcast_to(tmp192, [XBLOCK]) + tmp194 = 1.0 + tmp195 = tmp193 + tmp194 + tl.store(out_ptr48 + (tl.full([XBLOCK], 0, tl.int32)), tmp195, None) + elif pid < num_xblocks_49: + pid_offset = pid - num_xblocks_48 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp196 = tl.load(in_ptr49 + (0)) + tmp197 = tl.broadcast_to(tmp196, [XBLOCK]) + tmp198 = 1.0 + tmp199 = tmp197 + tmp198 + tl.store(out_ptr49 + (tl.full([XBLOCK], 0, tl.int32)), tmp199, None) + elif pid < num_xblocks_50: + pid_offset = pid - num_xblocks_49 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp200 = tl.load(in_ptr50 + (0)) + tmp201 = tl.broadcast_to(tmp200, [XBLOCK]) + tmp202 = 1.0 + tmp203 = tmp201 + tmp202 + tl.store(out_ptr50 + (tl.full([XBLOCK], 0, tl.int32)), tmp203, None) + elif pid < num_xblocks_51: + pid_offset = pid - num_xblocks_50 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp204 = tl.load(in_ptr51 + (0)) + tmp205 = tl.broadcast_to(tmp204, [XBLOCK]) + tmp206 = 1.0 + tmp207 = tmp205 + tmp206 + tl.store(out_ptr51 + (tl.full([XBLOCK], 0, tl.int32)), tmp207, None) + elif pid < num_xblocks_52: + pid_offset = pid - num_xblocks_51 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp208 = tl.load(in_ptr52 + (0)) + tmp209 = tl.broadcast_to(tmp208, [XBLOCK]) + tmp210 = 1.0 + tmp211 = tmp209 + tmp210 + tl.store(out_ptr52 + (tl.full([XBLOCK], 0, tl.int32)), tmp211, None) + elif pid < num_xblocks_53: + pid_offset = pid - num_xblocks_52 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp212 = tl.load(in_ptr53 + (0)) + tmp213 = tl.broadcast_to(tmp212, [XBLOCK]) + tmp214 = 1.0 + tmp215 = tmp213 + tmp214 + tl.store(out_ptr53 + (tl.full([XBLOCK], 0, tl.int32)), tmp215, None) + elif pid < num_xblocks_54: + pid_offset = pid - num_xblocks_53 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp216 = tl.load(in_ptr54 + (0)) + tmp217 = tl.broadcast_to(tmp216, [XBLOCK]) + tmp218 = 1.0 + tmp219 = tmp217 + tmp218 + tl.store(out_ptr54 + (tl.full([XBLOCK], 0, tl.int32)), tmp219, None) + elif pid < num_xblocks_55: + pid_offset = pid - num_xblocks_54 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp220 = tl.load(in_ptr55 + (0)) + tmp221 = tl.broadcast_to(tmp220, [XBLOCK]) + tmp222 = 1.0 + tmp223 = tmp221 + tmp222 + tl.store(out_ptr55 + (tl.full([XBLOCK], 0, tl.int32)), tmp223, None) + elif pid < num_xblocks_56: + pid_offset = pid - num_xblocks_55 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp224 = tl.load(in_ptr56 + (0)) + tmp225 = tl.broadcast_to(tmp224, [XBLOCK]) + tmp226 = 1.0 + tmp227 = tmp225 + tmp226 + tl.store(out_ptr56 + (tl.full([XBLOCK], 0, tl.int32)), tmp227, None) + elif pid < num_xblocks_57: + pid_offset = pid - num_xblocks_56 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp228 = tl.load(in_ptr57 + (0)) + tmp229 = tl.broadcast_to(tmp228, [XBLOCK]) + tmp230 = 1.0 + tmp231 = tmp229 + tmp230 + tl.store(out_ptr57 + (tl.full([XBLOCK], 0, tl.int32)), tmp231, None) + elif pid < num_xblocks_58: + pid_offset = pid - num_xblocks_57 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp232 = tl.load(in_ptr58 + (0)) + tmp233 = tl.broadcast_to(tmp232, [XBLOCK]) + tmp234 = 1.0 + tmp235 = tmp233 + tmp234 + tl.store(out_ptr58 + (tl.full([XBLOCK], 0, tl.int32)), tmp235, None) + elif pid < num_xblocks_59: + pid_offset = pid - num_xblocks_58 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp236 = tl.load(in_ptr59 + (0)) + tmp237 = tl.broadcast_to(tmp236, [XBLOCK]) + tmp238 = 1.0 + tmp239 = tmp237 + tmp238 + tl.store(out_ptr59 + (tl.full([XBLOCK], 0, tl.int32)), tmp239, None) + elif pid < num_xblocks_60: + pid_offset = pid - num_xblocks_59 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp240 = tl.load(in_ptr60 + (0)) + tmp241 = tl.broadcast_to(tmp240, [XBLOCK]) + tmp242 = 1.0 + tmp243 = tmp241 + tmp242 + tl.store(out_ptr60 + (tl.full([XBLOCK], 0, tl.int32)), tmp243, None) + elif pid < num_xblocks_61: + pid_offset = pid - num_xblocks_60 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp244 = tl.load(in_ptr61 + (0)) + tmp245 = tl.broadcast_to(tmp244, [XBLOCK]) + tmp246 = 1.0 + tmp247 = tmp245 + tmp246 + tl.store(out_ptr61 + (tl.full([XBLOCK], 0, tl.int32)), tmp247, None) + elif pid < num_xblocks_62: + pid_offset = pid - num_xblocks_61 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp248 = tl.load(in_ptr62 + (0)) + tmp249 = tl.broadcast_to(tmp248, [XBLOCK]) + tmp250 = 1.0 + tmp251 = tmp249 + tmp250 + tl.store(out_ptr62 + (tl.full([XBLOCK], 0, tl.int32)), tmp251, None) + elif pid < num_xblocks_63: + pid_offset = pid - num_xblocks_62 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp252 = tl.load(in_ptr63 + (0)) + tmp253 = tl.broadcast_to(tmp252, [XBLOCK]) + tmp254 = 1.0 + tmp255 = tmp253 + tmp254 + tl.store(out_ptr63 + (tl.full([XBLOCK], 0, tl.int32)), tmp255, None) + elif pid < num_xblocks_64: + pid_offset = pid - num_xblocks_63 + xnumel = 1 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + tmp256 = tl.load(in_ptr64 + (0)) + tmp257 = tl.broadcast_to(tmp256, [XBLOCK]) + tmp258 = 1.0 + tmp259 = tmp257 + tmp258 + tl.store(out_ptr64 + (tl.full([XBLOCK], 0, tl.int32)), tmp259, None) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/m5/cm5mdw5oawz5pdxxz24qte3du6nt7a74gsscrft2gcgexsa4agxx.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_2 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_2', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(38633472, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(786432, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(1769472, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(2304, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(589824, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(2359296, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(3072, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(2359296, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(1769472, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(2304, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(589824, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 38633472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), None) + tmp1 = tl.load(in_ptr1 + (x0), None) + tmp6 = tl.load(in_ptr2 + (x0), None) + tmp13 = tl.load(in_ptr3 + (x0), None) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, None) + tl.store(out_ptr2 + (x0), tmp34, None) + tl.store(out_ptr3 + (x0), tmp12, None) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 786432 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), None) + tmp36 = tl.load(in_ptr6 + (x1), None) + tmp41 = tl.load(in_ptr7 + (x1), None) + tmp48 = tl.load(in_ptr8 + (x1), None) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, None) + tl.store(out_ptr6 + (x1), tmp69, None) + tl.store(out_ptr7 + (x1), tmp47, None) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), xmask) + tmp106 = tl.load(in_ptr16 + (x3), xmask) + tmp111 = tl.load(in_ptr17 + (x3), xmask) + tmp118 = tl.load(in_ptr18 + (x3), xmask) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, xmask) + tl.store(out_ptr14 + (x3), tmp139, xmask) + tl.store(out_ptr15 + (x3), tmp117, xmask) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), None) + tmp141 = tl.load(in_ptr21 + (x4), None) + tmp146 = tl.load(in_ptr22 + (x4), None) + tmp153 = tl.load(in_ptr23 + (x4), None) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, None) + tl.store(out_ptr18 + (x4), tmp174, None) + tl.store(out_ptr19 + (x4), tmp152, None) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), xmask) + tmp176 = tl.load(in_ptr26 + (x5), xmask) + tmp181 = tl.load(in_ptr27 + (x5), xmask) + tmp188 = tl.load(in_ptr28 + (x5), xmask) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, xmask) + tl.store(out_ptr22 + (x5), tmp209, xmask) + tl.store(out_ptr23 + (x5), tmp187, xmask) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), None) + tmp211 = tl.load(in_ptr31 + (x6), None) + tmp216 = tl.load(in_ptr32 + (x6), None) + tmp223 = tl.load(in_ptr33 + (x6), None) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, None) + tl.store(out_ptr26 + (x6), tmp244, None) + tl.store(out_ptr27 + (x6), tmp222, None) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), xmask) + tmp316 = tl.load(in_ptr46 + (x9), xmask) + tmp321 = tl.load(in_ptr47 + (x9), xmask) + tmp328 = tl.load(in_ptr48 + (x9), xmask) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, xmask) + tl.store(out_ptr38 + (x9), tmp349, xmask) + tl.store(out_ptr39 + (x9), tmp327, xmask) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), None) + tmp351 = tl.load(in_ptr51 + (x10), None) + tmp356 = tl.load(in_ptr52 + (x10), None) + tmp363 = tl.load(in_ptr53 + (x10), None) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, None) + tl.store(out_ptr42 + (x10), tmp384, None) + tl.store(out_ptr43 + (x10), tmp362, None) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), xmask) + tmp386 = tl.load(in_ptr56 + (x11), xmask) + tmp391 = tl.load(in_ptr57 + (x11), xmask) + tmp398 = tl.load(in_ptr58 + (x11), xmask) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, xmask) + tl.store(out_ptr46 + (x11), tmp419, xmask) + tl.store(out_ptr47 + (x11), tmp397, xmask) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), None) + tmp421 = tl.load(in_ptr61 + (x12), None) + tmp426 = tl.load(in_ptr62 + (x12), None) + tmp433 = tl.load(in_ptr63 + (x12), None) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, None) + tl.store(out_ptr50 + (x12), tmp454, None) + tl.store(out_ptr51 + (x12), tmp432, None) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), xmask) + tmp526 = tl.load(in_ptr76 + (x15), xmask) + tmp531 = tl.load(in_ptr77 + (x15), xmask) + tmp538 = tl.load(in_ptr78 + (x15), xmask) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, xmask) + tl.store(out_ptr62 + (x15), tmp559, xmask) + tl.store(out_ptr63 + (x15), tmp537, xmask) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), None) + tmp561 = tl.load(in_ptr81 + (x16), None) + tmp566 = tl.load(in_ptr82 + (x16), None) + tmp573 = tl.load(in_ptr83 + (x16), None) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, None) + tl.store(out_ptr66 + (x16), tmp594, None) + tl.store(out_ptr67 + (x16), tmp572, None) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), xmask) + tmp596 = tl.load(in_ptr86 + (x17), xmask) + tmp601 = tl.load(in_ptr87 + (x17), xmask) + tmp608 = tl.load(in_ptr88 + (x17), xmask) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, xmask) + tl.store(out_ptr70 + (x17), tmp629, xmask) + tl.store(out_ptr71 + (x17), tmp607, xmask) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), None) + tmp631 = tl.load(in_ptr91 + (x18), None) + tmp636 = tl.load(in_ptr92 + (x18), None) + tmp643 = tl.load(in_ptr93 + (x18), None) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, None) + tl.store(out_ptr74 + (x18), tmp664, None) + tl.store(out_ptr75 + (x18), tmp642, None) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/uc/cucov6bdfoahzje6orumwjboz53njy6qiq76hwevxfqpml6gkhy7.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_3 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_3', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(768, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(2359296, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(3072, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(2359296, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(1769472, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(2304, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(589824, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(2359296, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(3072, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(2359296, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp6 = tl.load(in_ptr2 + (x0), xmask) + tmp13 = tl.load(in_ptr3 + (x0), xmask) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, xmask) + tl.store(out_ptr2 + (x0), tmp34, xmask) + tl.store(out_ptr3 + (x0), tmp12, xmask) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), xmask) + tmp36 = tl.load(in_ptr6 + (x1), xmask) + tmp41 = tl.load(in_ptr7 + (x1), xmask) + tmp48 = tl.load(in_ptr8 + (x1), xmask) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, xmask) + tl.store(out_ptr6 + (x1), tmp69, xmask) + tl.store(out_ptr7 + (x1), tmp47, xmask) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), None) + tmp106 = tl.load(in_ptr16 + (x3), None) + tmp111 = tl.load(in_ptr17 + (x3), None) + tmp118 = tl.load(in_ptr18 + (x3), None) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, None) + tl.store(out_ptr14 + (x3), tmp139, None) + tl.store(out_ptr15 + (x3), tmp117, None) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), xmask) + tmp141 = tl.load(in_ptr21 + (x4), xmask) + tmp146 = tl.load(in_ptr22 + (x4), xmask) + tmp153 = tl.load(in_ptr23 + (x4), xmask) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, xmask) + tl.store(out_ptr18 + (x4), tmp174, xmask) + tl.store(out_ptr19 + (x4), tmp152, xmask) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), None) + tmp176 = tl.load(in_ptr26 + (x5), None) + tmp181 = tl.load(in_ptr27 + (x5), None) + tmp188 = tl.load(in_ptr28 + (x5), None) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, None) + tl.store(out_ptr22 + (x5), tmp209, None) + tl.store(out_ptr23 + (x5), tmp187, None) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), xmask) + tmp211 = tl.load(in_ptr31 + (x6), xmask) + tmp216 = tl.load(in_ptr32 + (x6), xmask) + tmp223 = tl.load(in_ptr33 + (x6), xmask) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, xmask) + tl.store(out_ptr26 + (x6), tmp244, xmask) + tl.store(out_ptr27 + (x6), tmp222, xmask) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), None) + tmp316 = tl.load(in_ptr46 + (x9), None) + tmp321 = tl.load(in_ptr47 + (x9), None) + tmp328 = tl.load(in_ptr48 + (x9), None) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, None) + tl.store(out_ptr38 + (x9), tmp349, None) + tl.store(out_ptr39 + (x9), tmp327, None) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), xmask) + tmp351 = tl.load(in_ptr51 + (x10), xmask) + tmp356 = tl.load(in_ptr52 + (x10), xmask) + tmp363 = tl.load(in_ptr53 + (x10), xmask) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, xmask) + tl.store(out_ptr42 + (x10), tmp384, xmask) + tl.store(out_ptr43 + (x10), tmp362, xmask) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), None) + tmp386 = tl.load(in_ptr56 + (x11), None) + tmp391 = tl.load(in_ptr57 + (x11), None) + tmp398 = tl.load(in_ptr58 + (x11), None) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, None) + tl.store(out_ptr46 + (x11), tmp419, None) + tl.store(out_ptr47 + (x11), tmp397, None) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), xmask) + tmp421 = tl.load(in_ptr61 + (x12), xmask) + tmp426 = tl.load(in_ptr62 + (x12), xmask) + tmp433 = tl.load(in_ptr63 + (x12), xmask) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, xmask) + tl.store(out_ptr50 + (x12), tmp454, xmask) + tl.store(out_ptr51 + (x12), tmp432, xmask) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), None) + tmp526 = tl.load(in_ptr76 + (x15), None) + tmp531 = tl.load(in_ptr77 + (x15), None) + tmp538 = tl.load(in_ptr78 + (x15), None) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, None) + tl.store(out_ptr62 + (x15), tmp559, None) + tl.store(out_ptr63 + (x15), tmp537, None) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), xmask) + tmp561 = tl.load(in_ptr81 + (x16), xmask) + tmp566 = tl.load(in_ptr82 + (x16), xmask) + tmp573 = tl.load(in_ptr83 + (x16), xmask) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, xmask) + tl.store(out_ptr66 + (x16), tmp594, xmask) + tl.store(out_ptr67 + (x16), tmp572, xmask) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), None) + tmp596 = tl.load(in_ptr86 + (x17), None) + tmp601 = tl.load(in_ptr87 + (x17), None) + tmp608 = tl.load(in_ptr88 + (x17), None) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, None) + tl.store(out_ptr70 + (x17), tmp629, None) + tl.store(out_ptr71 + (x17), tmp607, None) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), xmask) + tmp631 = tl.load(in_ptr91 + (x18), xmask) + tmp636 = tl.load(in_ptr92 + (x18), xmask) + tmp643 = tl.load(in_ptr93 + (x18), xmask) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, xmask) + tl.store(out_ptr74 + (x18), tmp664, xmask) + tl.store(out_ptr75 + (x18), tmp642, xmask) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/bh/cbhrsnjoafi77bxs2ehpwzsnbhqnh3f6deudav2zfp3cry6aeyeg.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_4 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_4', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(768, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(1769472, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(2304, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(589824, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(2359296, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(3072, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(2359296, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(1769472, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(2304, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(589824, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp6 = tl.load(in_ptr2 + (x0), xmask) + tmp13 = tl.load(in_ptr3 + (x0), xmask) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, xmask) + tl.store(out_ptr2 + (x0), tmp34, xmask) + tl.store(out_ptr3 + (x0), tmp12, xmask) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), xmask) + tmp36 = tl.load(in_ptr6 + (x1), xmask) + tmp41 = tl.load(in_ptr7 + (x1), xmask) + tmp48 = tl.load(in_ptr8 + (x1), xmask) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, xmask) + tl.store(out_ptr6 + (x1), tmp69, xmask) + tl.store(out_ptr7 + (x1), tmp47, xmask) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), None) + tmp71 = tl.load(in_ptr11 + (x2), None) + tmp76 = tl.load(in_ptr12 + (x2), None) + tmp83 = tl.load(in_ptr13 + (x2), None) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, None) + tl.store(out_ptr10 + (x2), tmp104, None) + tl.store(out_ptr11 + (x2), tmp82, None) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), xmask) + tmp106 = tl.load(in_ptr16 + (x3), xmask) + tmp111 = tl.load(in_ptr17 + (x3), xmask) + tmp118 = tl.load(in_ptr18 + (x3), xmask) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, xmask) + tl.store(out_ptr14 + (x3), tmp139, xmask) + tl.store(out_ptr15 + (x3), tmp117, xmask) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), None) + tmp141 = tl.load(in_ptr21 + (x4), None) + tmp146 = tl.load(in_ptr22 + (x4), None) + tmp153 = tl.load(in_ptr23 + (x4), None) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, None) + tl.store(out_ptr18 + (x4), tmp174, None) + tl.store(out_ptr19 + (x4), tmp152, None) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), xmask) + tmp176 = tl.load(in_ptr26 + (x5), xmask) + tmp181 = tl.load(in_ptr27 + (x5), xmask) + tmp188 = tl.load(in_ptr28 + (x5), xmask) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, xmask) + tl.store(out_ptr22 + (x5), tmp209, xmask) + tl.store(out_ptr23 + (x5), tmp187, xmask) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), xmask) + tmp211 = tl.load(in_ptr31 + (x6), xmask) + tmp216 = tl.load(in_ptr32 + (x6), xmask) + tmp223 = tl.load(in_ptr33 + (x6), xmask) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, xmask) + tl.store(out_ptr26 + (x6), tmp244, xmask) + tl.store(out_ptr27 + (x6), tmp222, xmask) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), None) + tmp281 = tl.load(in_ptr41 + (x8), None) + tmp286 = tl.load(in_ptr42 + (x8), None) + tmp293 = tl.load(in_ptr43 + (x8), None) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, None) + tl.store(out_ptr34 + (x8), tmp314, None) + tl.store(out_ptr35 + (x8), tmp292, None) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), xmask) + tmp316 = tl.load(in_ptr46 + (x9), xmask) + tmp321 = tl.load(in_ptr47 + (x9), xmask) + tmp328 = tl.load(in_ptr48 + (x9), xmask) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, xmask) + tl.store(out_ptr38 + (x9), tmp349, xmask) + tl.store(out_ptr39 + (x9), tmp327, xmask) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), None) + tmp351 = tl.load(in_ptr51 + (x10), None) + tmp356 = tl.load(in_ptr52 + (x10), None) + tmp363 = tl.load(in_ptr53 + (x10), None) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, None) + tl.store(out_ptr42 + (x10), tmp384, None) + tl.store(out_ptr43 + (x10), tmp362, None) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), xmask) + tmp386 = tl.load(in_ptr56 + (x11), xmask) + tmp391 = tl.load(in_ptr57 + (x11), xmask) + tmp398 = tl.load(in_ptr58 + (x11), xmask) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, xmask) + tl.store(out_ptr46 + (x11), tmp419, xmask) + tl.store(out_ptr47 + (x11), tmp397, xmask) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), xmask) + tmp421 = tl.load(in_ptr61 + (x12), xmask) + tmp426 = tl.load(in_ptr62 + (x12), xmask) + tmp433 = tl.load(in_ptr63 + (x12), xmask) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, xmask) + tl.store(out_ptr50 + (x12), tmp454, xmask) + tl.store(out_ptr51 + (x12), tmp432, xmask) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), None) + tmp491 = tl.load(in_ptr71 + (x14), None) + tmp496 = tl.load(in_ptr72 + (x14), None) + tmp503 = tl.load(in_ptr73 + (x14), None) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, None) + tl.store(out_ptr58 + (x14), tmp524, None) + tl.store(out_ptr59 + (x14), tmp502, None) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), xmask) + tmp526 = tl.load(in_ptr76 + (x15), xmask) + tmp531 = tl.load(in_ptr77 + (x15), xmask) + tmp538 = tl.load(in_ptr78 + (x15), xmask) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, xmask) + tl.store(out_ptr62 + (x15), tmp559, xmask) + tl.store(out_ptr63 + (x15), tmp537, xmask) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), None) + tmp561 = tl.load(in_ptr81 + (x16), None) + tmp566 = tl.load(in_ptr82 + (x16), None) + tmp573 = tl.load(in_ptr83 + (x16), None) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, None) + tl.store(out_ptr66 + (x16), tmp594, None) + tl.store(out_ptr67 + (x16), tmp572, None) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), xmask) + tmp596 = tl.load(in_ptr86 + (x17), xmask) + tmp601 = tl.load(in_ptr87 + (x17), xmask) + tmp608 = tl.load(in_ptr88 + (x17), xmask) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, xmask) + tl.store(out_ptr70 + (x17), tmp629, xmask) + tl.store(out_ptr71 + (x17), tmp607, xmask) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), xmask) + tmp631 = tl.load(in_ptr91 + (x18), xmask) + tmp636 = tl.load(in_ptr92 + (x18), xmask) + tmp643 = tl.load(in_ptr93 + (x18), xmask) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, xmask) + tl.store(out_ptr74 + (x18), tmp664, xmask) + tl.store(out_ptr75 + (x18), tmp642, xmask) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/st/cstrucu2uxk3ht2xjqayfn4a2bmj6fq5izilkcg3bjyjuhtnj5nc.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_5 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_5', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(768, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(2359296, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(3072, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(2359296, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(1769472, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(2304, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(589824, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(2359296, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(3072, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(2359296, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp6 = tl.load(in_ptr2 + (x0), xmask) + tmp13 = tl.load(in_ptr3 + (x0), xmask) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, xmask) + tl.store(out_ptr2 + (x0), tmp34, xmask) + tl.store(out_ptr3 + (x0), tmp12, xmask) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), None) + tmp36 = tl.load(in_ptr6 + (x1), None) + tmp41 = tl.load(in_ptr7 + (x1), None) + tmp48 = tl.load(in_ptr8 + (x1), None) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, None) + tl.store(out_ptr6 + (x1), tmp69, None) + tl.store(out_ptr7 + (x1), tmp47, None) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), None) + tmp106 = tl.load(in_ptr16 + (x3), None) + tmp111 = tl.load(in_ptr17 + (x3), None) + tmp118 = tl.load(in_ptr18 + (x3), None) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, None) + tl.store(out_ptr14 + (x3), tmp139, None) + tl.store(out_ptr15 + (x3), tmp117, None) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), xmask) + tmp141 = tl.load(in_ptr21 + (x4), xmask) + tmp146 = tl.load(in_ptr22 + (x4), xmask) + tmp153 = tl.load(in_ptr23 + (x4), xmask) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, xmask) + tl.store(out_ptr18 + (x4), tmp174, xmask) + tl.store(out_ptr19 + (x4), tmp152, xmask) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), xmask) + tmp176 = tl.load(in_ptr26 + (x5), xmask) + tmp181 = tl.load(in_ptr27 + (x5), xmask) + tmp188 = tl.load(in_ptr28 + (x5), xmask) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, xmask) + tl.store(out_ptr22 + (x5), tmp209, xmask) + tl.store(out_ptr23 + (x5), tmp187, xmask) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), xmask) + tmp211 = tl.load(in_ptr31 + (x6), xmask) + tmp216 = tl.load(in_ptr32 + (x6), xmask) + tmp223 = tl.load(in_ptr33 + (x6), xmask) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, xmask) + tl.store(out_ptr26 + (x6), tmp244, xmask) + tl.store(out_ptr27 + (x6), tmp222, xmask) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), None) + tmp246 = tl.load(in_ptr36 + (x7), None) + tmp251 = tl.load(in_ptr37 + (x7), None) + tmp258 = tl.load(in_ptr38 + (x7), None) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, None) + tl.store(out_ptr30 + (x7), tmp279, None) + tl.store(out_ptr31 + (x7), tmp257, None) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), None) + tmp316 = tl.load(in_ptr46 + (x9), None) + tmp321 = tl.load(in_ptr47 + (x9), None) + tmp328 = tl.load(in_ptr48 + (x9), None) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, None) + tl.store(out_ptr38 + (x9), tmp349, None) + tl.store(out_ptr39 + (x9), tmp327, None) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), xmask) + tmp351 = tl.load(in_ptr51 + (x10), xmask) + tmp356 = tl.load(in_ptr52 + (x10), xmask) + tmp363 = tl.load(in_ptr53 + (x10), xmask) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, xmask) + tl.store(out_ptr42 + (x10), tmp384, xmask) + tl.store(out_ptr43 + (x10), tmp362, xmask) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), xmask) + tmp386 = tl.load(in_ptr56 + (x11), xmask) + tmp391 = tl.load(in_ptr57 + (x11), xmask) + tmp398 = tl.load(in_ptr58 + (x11), xmask) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, xmask) + tl.store(out_ptr46 + (x11), tmp419, xmask) + tl.store(out_ptr47 + (x11), tmp397, xmask) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), xmask) + tmp421 = tl.load(in_ptr61 + (x12), xmask) + tmp426 = tl.load(in_ptr62 + (x12), xmask) + tmp433 = tl.load(in_ptr63 + (x12), xmask) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, xmask) + tl.store(out_ptr50 + (x12), tmp454, xmask) + tl.store(out_ptr51 + (x12), tmp432, xmask) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), None) + tmp456 = tl.load(in_ptr66 + (x13), None) + tmp461 = tl.load(in_ptr67 + (x13), None) + tmp468 = tl.load(in_ptr68 + (x13), None) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, None) + tl.store(out_ptr54 + (x13), tmp489, None) + tl.store(out_ptr55 + (x13), tmp467, None) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), None) + tmp526 = tl.load(in_ptr76 + (x15), None) + tmp531 = tl.load(in_ptr77 + (x15), None) + tmp538 = tl.load(in_ptr78 + (x15), None) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, None) + tl.store(out_ptr62 + (x15), tmp559, None) + tl.store(out_ptr63 + (x15), tmp537, None) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), xmask) + tmp561 = tl.load(in_ptr81 + (x16), xmask) + tmp566 = tl.load(in_ptr82 + (x16), xmask) + tmp573 = tl.load(in_ptr83 + (x16), xmask) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, xmask) + tl.store(out_ptr66 + (x16), tmp594, xmask) + tl.store(out_ptr67 + (x16), tmp572, xmask) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), xmask) + tmp596 = tl.load(in_ptr86 + (x17), xmask) + tmp601 = tl.load(in_ptr87 + (x17), xmask) + tmp608 = tl.load(in_ptr88 + (x17), xmask) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, xmask) + tl.store(out_ptr70 + (x17), tmp629, xmask) + tl.store(out_ptr71 + (x17), tmp607, xmask) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), xmask) + tmp631 = tl.load(in_ptr91 + (x18), xmask) + tmp636 = tl.load(in_ptr92 + (x18), xmask) + tmp643 = tl.load(in_ptr93 + (x18), xmask) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, xmask) + tl.store(out_ptr74 + (x18), tmp664, xmask) + tl.store(out_ptr75 + (x18), tmp642, xmask) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/i7/ci7zf4ub54qutpo6uq7b2vzte3edxvvknb4h5fur6mesuyjgze47.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_6 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_6', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(1769472, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(2304, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(589824, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(2359296, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(3072, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(2359296, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(1769472, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(2304, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(589824, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(2359296, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), None) + tmp1 = tl.load(in_ptr1 + (x0), None) + tmp6 = tl.load(in_ptr2 + (x0), None) + tmp13 = tl.load(in_ptr3 + (x0), None) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, None) + tl.store(out_ptr2 + (x0), tmp34, None) + tl.store(out_ptr3 + (x0), tmp12, None) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), xmask) + tmp36 = tl.load(in_ptr6 + (x1), xmask) + tmp41 = tl.load(in_ptr7 + (x1), xmask) + tmp48 = tl.load(in_ptr8 + (x1), xmask) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, xmask) + tl.store(out_ptr6 + (x1), tmp69, xmask) + tl.store(out_ptr7 + (x1), tmp47, xmask) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), None) + tmp71 = tl.load(in_ptr11 + (x2), None) + tmp76 = tl.load(in_ptr12 + (x2), None) + tmp83 = tl.load(in_ptr13 + (x2), None) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, None) + tl.store(out_ptr10 + (x2), tmp104, None) + tl.store(out_ptr11 + (x2), tmp82, None) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), xmask) + tmp106 = tl.load(in_ptr16 + (x3), xmask) + tmp111 = tl.load(in_ptr17 + (x3), xmask) + tmp118 = tl.load(in_ptr18 + (x3), xmask) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, xmask) + tl.store(out_ptr14 + (x3), tmp139, xmask) + tl.store(out_ptr15 + (x3), tmp117, xmask) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), xmask) + tmp141 = tl.load(in_ptr21 + (x4), xmask) + tmp146 = tl.load(in_ptr22 + (x4), xmask) + tmp153 = tl.load(in_ptr23 + (x4), xmask) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, xmask) + tl.store(out_ptr18 + (x4), tmp174, xmask) + tl.store(out_ptr19 + (x4), tmp152, xmask) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), xmask) + tmp176 = tl.load(in_ptr26 + (x5), xmask) + tmp181 = tl.load(in_ptr27 + (x5), xmask) + tmp188 = tl.load(in_ptr28 + (x5), xmask) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, xmask) + tl.store(out_ptr22 + (x5), tmp209, xmask) + tl.store(out_ptr23 + (x5), tmp187, xmask) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), None) + tmp211 = tl.load(in_ptr31 + (x6), None) + tmp216 = tl.load(in_ptr32 + (x6), None) + tmp223 = tl.load(in_ptr33 + (x6), None) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, None) + tl.store(out_ptr26 + (x6), tmp244, None) + tl.store(out_ptr27 + (x6), tmp222, None) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), None) + tmp281 = tl.load(in_ptr41 + (x8), None) + tmp286 = tl.load(in_ptr42 + (x8), None) + tmp293 = tl.load(in_ptr43 + (x8), None) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, None) + tl.store(out_ptr34 + (x8), tmp314, None) + tl.store(out_ptr35 + (x8), tmp292, None) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), xmask) + tmp316 = tl.load(in_ptr46 + (x9), xmask) + tmp321 = tl.load(in_ptr47 + (x9), xmask) + tmp328 = tl.load(in_ptr48 + (x9), xmask) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, xmask) + tl.store(out_ptr38 + (x9), tmp349, xmask) + tl.store(out_ptr39 + (x9), tmp327, xmask) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), xmask) + tmp351 = tl.load(in_ptr51 + (x10), xmask) + tmp356 = tl.load(in_ptr52 + (x10), xmask) + tmp363 = tl.load(in_ptr53 + (x10), xmask) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, xmask) + tl.store(out_ptr42 + (x10), tmp384, xmask) + tl.store(out_ptr43 + (x10), tmp362, xmask) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), xmask) + tmp386 = tl.load(in_ptr56 + (x11), xmask) + tmp391 = tl.load(in_ptr57 + (x11), xmask) + tmp398 = tl.load(in_ptr58 + (x11), xmask) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, xmask) + tl.store(out_ptr46 + (x11), tmp419, xmask) + tl.store(out_ptr47 + (x11), tmp397, xmask) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), None) + tmp421 = tl.load(in_ptr61 + (x12), None) + tmp426 = tl.load(in_ptr62 + (x12), None) + tmp433 = tl.load(in_ptr63 + (x12), None) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, None) + tl.store(out_ptr50 + (x12), tmp454, None) + tl.store(out_ptr51 + (x12), tmp432, None) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), None) + tmp491 = tl.load(in_ptr71 + (x14), None) + tmp496 = tl.load(in_ptr72 + (x14), None) + tmp503 = tl.load(in_ptr73 + (x14), None) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, None) + tl.store(out_ptr58 + (x14), tmp524, None) + tl.store(out_ptr59 + (x14), tmp502, None) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), xmask) + tmp526 = tl.load(in_ptr76 + (x15), xmask) + tmp531 = tl.load(in_ptr77 + (x15), xmask) + tmp538 = tl.load(in_ptr78 + (x15), xmask) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, xmask) + tl.store(out_ptr62 + (x15), tmp559, xmask) + tl.store(out_ptr63 + (x15), tmp537, xmask) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), xmask) + tmp561 = tl.load(in_ptr81 + (x16), xmask) + tmp566 = tl.load(in_ptr82 + (x16), xmask) + tmp573 = tl.load(in_ptr83 + (x16), xmask) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, xmask) + tl.store(out_ptr66 + (x16), tmp594, xmask) + tl.store(out_ptr67 + (x16), tmp572, xmask) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), xmask) + tmp596 = tl.load(in_ptr86 + (x17), xmask) + tmp601 = tl.load(in_ptr87 + (x17), xmask) + tmp608 = tl.load(in_ptr88 + (x17), xmask) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, xmask) + tl.store(out_ptr70 + (x17), tmp629, xmask) + tl.store(out_ptr71 + (x17), tmp607, xmask) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), None) + tmp631 = tl.load(in_ptr91 + (x18), None) + tmp636 = tl.load(in_ptr92 + (x18), None) + tmp643 = tl.load(in_ptr93 + (x18), None) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, None) + tl.store(out_ptr74 + (x18), tmp664, None) + tl.store(out_ptr75 + (x18), tmp642, None) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/jn/cjnlfnxlu6k55o3ttjkcyltkwk6qkjiz7qsvfo3lxai6avs4elew.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_7 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_7', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(3072, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(2359296, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(1769472, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(2304, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(589824, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(2359296, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(3072, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(2359296, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(1769472, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(2304, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp6 = tl.load(in_ptr2 + (x0), xmask) + tmp13 = tl.load(in_ptr3 + (x0), xmask) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, xmask) + tl.store(out_ptr2 + (x0), tmp34, xmask) + tl.store(out_ptr3 + (x0), tmp12, xmask) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), None) + tmp36 = tl.load(in_ptr6 + (x1), None) + tmp41 = tl.load(in_ptr7 + (x1), None) + tmp48 = tl.load(in_ptr8 + (x1), None) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, None) + tl.store(out_ptr6 + (x1), tmp69, None) + tl.store(out_ptr7 + (x1), tmp47, None) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), xmask) + tmp106 = tl.load(in_ptr16 + (x3), xmask) + tmp111 = tl.load(in_ptr17 + (x3), xmask) + tmp118 = tl.load(in_ptr18 + (x3), xmask) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, xmask) + tl.store(out_ptr14 + (x3), tmp139, xmask) + tl.store(out_ptr15 + (x3), tmp117, xmask) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), xmask) + tmp141 = tl.load(in_ptr21 + (x4), xmask) + tmp146 = tl.load(in_ptr22 + (x4), xmask) + tmp153 = tl.load(in_ptr23 + (x4), xmask) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, xmask) + tl.store(out_ptr18 + (x4), tmp174, xmask) + tl.store(out_ptr19 + (x4), tmp152, xmask) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), None) + tmp176 = tl.load(in_ptr26 + (x5), None) + tmp181 = tl.load(in_ptr27 + (x5), None) + tmp188 = tl.load(in_ptr28 + (x5), None) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, None) + tl.store(out_ptr22 + (x5), tmp209, None) + tl.store(out_ptr23 + (x5), tmp187, None) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), xmask) + tmp211 = tl.load(in_ptr31 + (x6), xmask) + tmp216 = tl.load(in_ptr32 + (x6), xmask) + tmp223 = tl.load(in_ptr33 + (x6), xmask) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, xmask) + tl.store(out_ptr26 + (x6), tmp244, xmask) + tl.store(out_ptr27 + (x6), tmp222, xmask) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), None) + tmp246 = tl.load(in_ptr36 + (x7), None) + tmp251 = tl.load(in_ptr37 + (x7), None) + tmp258 = tl.load(in_ptr38 + (x7), None) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, None) + tl.store(out_ptr30 + (x7), tmp279, None) + tl.store(out_ptr31 + (x7), tmp257, None) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), xmask) + tmp316 = tl.load(in_ptr46 + (x9), xmask) + tmp321 = tl.load(in_ptr47 + (x9), xmask) + tmp328 = tl.load(in_ptr48 + (x9), xmask) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, xmask) + tl.store(out_ptr38 + (x9), tmp349, xmask) + tl.store(out_ptr39 + (x9), tmp327, xmask) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), xmask) + tmp351 = tl.load(in_ptr51 + (x10), xmask) + tmp356 = tl.load(in_ptr52 + (x10), xmask) + tmp363 = tl.load(in_ptr53 + (x10), xmask) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, xmask) + tl.store(out_ptr42 + (x10), tmp384, xmask) + tl.store(out_ptr43 + (x10), tmp362, xmask) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), None) + tmp386 = tl.load(in_ptr56 + (x11), None) + tmp391 = tl.load(in_ptr57 + (x11), None) + tmp398 = tl.load(in_ptr58 + (x11), None) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, None) + tl.store(out_ptr46 + (x11), tmp419, None) + tl.store(out_ptr47 + (x11), tmp397, None) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), xmask) + tmp421 = tl.load(in_ptr61 + (x12), xmask) + tmp426 = tl.load(in_ptr62 + (x12), xmask) + tmp433 = tl.load(in_ptr63 + (x12), xmask) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, xmask) + tl.store(out_ptr50 + (x12), tmp454, xmask) + tl.store(out_ptr51 + (x12), tmp432, xmask) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), None) + tmp456 = tl.load(in_ptr66 + (x13), None) + tmp461 = tl.load(in_ptr67 + (x13), None) + tmp468 = tl.load(in_ptr68 + (x13), None) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, None) + tl.store(out_ptr54 + (x13), tmp489, None) + tl.store(out_ptr55 + (x13), tmp467, None) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), xmask) + tmp526 = tl.load(in_ptr76 + (x15), xmask) + tmp531 = tl.load(in_ptr77 + (x15), xmask) + tmp538 = tl.load(in_ptr78 + (x15), xmask) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, xmask) + tl.store(out_ptr62 + (x15), tmp559, xmask) + tl.store(out_ptr63 + (x15), tmp537, xmask) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), xmask) + tmp561 = tl.load(in_ptr81 + (x16), xmask) + tmp566 = tl.load(in_ptr82 + (x16), xmask) + tmp573 = tl.load(in_ptr83 + (x16), xmask) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, xmask) + tl.store(out_ptr66 + (x16), tmp594, xmask) + tl.store(out_ptr67 + (x16), tmp572, xmask) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), None) + tmp596 = tl.load(in_ptr86 + (x17), None) + tmp601 = tl.load(in_ptr87 + (x17), None) + tmp608 = tl.load(in_ptr88 + (x17), None) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, None) + tl.store(out_ptr70 + (x17), tmp629, None) + tl.store(out_ptr71 + (x17), tmp607, None) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), xmask) + tmp631 = tl.load(in_ptr91 + (x18), xmask) + tmp636 = tl.load(in_ptr92 + (x18), xmask) + tmp643 = tl.load(in_ptr93 + (x18), xmask) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, xmask) + tl.store(out_ptr74 + (x18), tmp664, xmask) + tl.store(out_ptr75 + (x18), tmp642, xmask) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/ot/cotx3jbpug3oh3qmmhribixmalckqjrqblcah6fbsskmomye67hg.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_8 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_8', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(589824, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(2359296, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(3072, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(2359296, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(1769472, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(2304, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(589824, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK) + num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK) + num_xblocks_16 = num_xblocks_15 + tl.cdiv(2359296, XBLOCK) + num_xblocks_17 = num_xblocks_16 + tl.cdiv(3072, XBLOCK) + num_xblocks_18 = num_xblocks_17 + tl.cdiv(2359296, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), None) + tmp1 = tl.load(in_ptr1 + (x0), None) + tmp6 = tl.load(in_ptr2 + (x0), None) + tmp13 = tl.load(in_ptr3 + (x0), None) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, None) + tl.store(out_ptr2 + (x0), tmp34, None) + tl.store(out_ptr3 + (x0), tmp12, None) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), xmask) + tmp36 = tl.load(in_ptr6 + (x1), xmask) + tmp41 = tl.load(in_ptr7 + (x1), xmask) + tmp48 = tl.load(in_ptr8 + (x1), xmask) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, xmask) + tl.store(out_ptr6 + (x1), tmp69, xmask) + tl.store(out_ptr7 + (x1), tmp47, xmask) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), xmask) + tmp106 = tl.load(in_ptr16 + (x3), xmask) + tmp111 = tl.load(in_ptr17 + (x3), xmask) + tmp118 = tl.load(in_ptr18 + (x3), xmask) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, xmask) + tl.store(out_ptr14 + (x3), tmp139, xmask) + tl.store(out_ptr15 + (x3), tmp117, xmask) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), None) + tmp141 = tl.load(in_ptr21 + (x4), None) + tmp146 = tl.load(in_ptr22 + (x4), None) + tmp153 = tl.load(in_ptr23 + (x4), None) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, None) + tl.store(out_ptr18 + (x4), tmp174, None) + tl.store(out_ptr19 + (x4), tmp152, None) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), xmask) + tmp176 = tl.load(in_ptr26 + (x5), xmask) + tmp181 = tl.load(in_ptr27 + (x5), xmask) + tmp188 = tl.load(in_ptr28 + (x5), xmask) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, xmask) + tl.store(out_ptr22 + (x5), tmp209, xmask) + tl.store(out_ptr23 + (x5), tmp187, xmask) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), None) + tmp211 = tl.load(in_ptr31 + (x6), None) + tmp216 = tl.load(in_ptr32 + (x6), None) + tmp223 = tl.load(in_ptr33 + (x6), None) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, None) + tl.store(out_ptr26 + (x6), tmp244, None) + tl.store(out_ptr27 + (x6), tmp222, None) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), xmask) + tmp316 = tl.load(in_ptr46 + (x9), xmask) + tmp321 = tl.load(in_ptr47 + (x9), xmask) + tmp328 = tl.load(in_ptr48 + (x9), xmask) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, xmask) + tl.store(out_ptr38 + (x9), tmp349, xmask) + tl.store(out_ptr39 + (x9), tmp327, xmask) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), None) + tmp351 = tl.load(in_ptr51 + (x10), None) + tmp356 = tl.load(in_ptr52 + (x10), None) + tmp363 = tl.load(in_ptr53 + (x10), None) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, None) + tl.store(out_ptr42 + (x10), tmp384, None) + tl.store(out_ptr43 + (x10), tmp362, None) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), xmask) + tmp386 = tl.load(in_ptr56 + (x11), xmask) + tmp391 = tl.load(in_ptr57 + (x11), xmask) + tmp398 = tl.load(in_ptr58 + (x11), xmask) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, xmask) + tl.store(out_ptr46 + (x11), tmp419, xmask) + tl.store(out_ptr47 + (x11), tmp397, xmask) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), None) + tmp421 = tl.load(in_ptr61 + (x12), None) + tmp426 = tl.load(in_ptr62 + (x12), None) + tmp433 = tl.load(in_ptr63 + (x12), None) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, None) + tl.store(out_ptr50 + (x12), tmp454, None) + tl.store(out_ptr51 + (x12), tmp432, None) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + elif pid < num_xblocks_15: + pid_offset = pid - num_xblocks_14 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x15 = xindex + tmp525 = tl.load(in_ptr75 + (x15), xmask) + tmp526 = tl.load(in_ptr76 + (x15), xmask) + tmp531 = tl.load(in_ptr77 + (x15), xmask) + tmp538 = tl.load(in_ptr78 + (x15), xmask) + tmp540 = tl.load(in_ptr79 + (0)) + tmp541 = tl.broadcast_to(tmp540, [XBLOCK]) + tmp527 = tmp526 - tmp525 + tmp528 = 0.09999999999999998 + tmp529 = tmp527 * tmp528 + tmp530 = tmp525 + tmp529 + tmp532 = 0.999 + tmp533 = tmp531 * tmp532 + tmp534 = tmp526 * tmp526 + tmp535 = 0.0010000000000000009 + tmp536 = tmp534 * tmp535 + tmp537 = tmp533 + tmp536 + tmp539 = libdevice.sqrt(tmp537) + tmp542 = libdevice.pow(tmp532, tmp541) + tmp543 = 1.0 + tmp544 = tmp542 - tmp543 + tmp545 = -tmp544 + tmp546 = libdevice.sqrt(tmp545) + tmp547 = tmp539 / tmp546 + tmp548 = 1e-08 + tmp549 = tmp547 + tmp548 + tmp550 = 0.9 + tmp551 = libdevice.pow(tmp550, tmp541) + tmp552 = tmp551 - tmp543 + tmp553 = 100.0 + tmp554 = tmp552 * tmp553 + tmp555 = tl.full([1], 1, tl.int32) + tmp556 = tmp555 / tmp554 + tmp557 = tmp549 / tmp556 + tmp558 = tmp530 / tmp557 + tmp559 = tmp538 + tmp558 + tl.store(out_ptr60 + (x15), tmp530, xmask) + tl.store(out_ptr62 + (x15), tmp559, xmask) + tl.store(out_ptr63 + (x15), tmp537, xmask) + elif pid < num_xblocks_16: + pid_offset = pid - num_xblocks_15 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x16 = xindex + tmp560 = tl.load(in_ptr80 + (x16), None) + tmp561 = tl.load(in_ptr81 + (x16), None) + tmp566 = tl.load(in_ptr82 + (x16), None) + tmp573 = tl.load(in_ptr83 + (x16), None) + tmp575 = tl.load(in_ptr84 + (0)) + tmp576 = tl.broadcast_to(tmp575, [XBLOCK]) + tmp562 = tmp561 - tmp560 + tmp563 = 0.09999999999999998 + tmp564 = tmp562 * tmp563 + tmp565 = tmp560 + tmp564 + tmp567 = 0.999 + tmp568 = tmp566 * tmp567 + tmp569 = tmp561 * tmp561 + tmp570 = 0.0010000000000000009 + tmp571 = tmp569 * tmp570 + tmp572 = tmp568 + tmp571 + tmp574 = libdevice.sqrt(tmp572) + tmp577 = libdevice.pow(tmp567, tmp576) + tmp578 = 1.0 + tmp579 = tmp577 - tmp578 + tmp580 = -tmp579 + tmp581 = libdevice.sqrt(tmp580) + tmp582 = tmp574 / tmp581 + tmp583 = 1e-08 + tmp584 = tmp582 + tmp583 + tmp585 = 0.9 + tmp586 = libdevice.pow(tmp585, tmp576) + tmp587 = tmp586 - tmp578 + tmp588 = 100.0 + tmp589 = tmp587 * tmp588 + tmp590 = tl.full([1], 1, tl.int32) + tmp591 = tmp590 / tmp589 + tmp592 = tmp584 / tmp591 + tmp593 = tmp565 / tmp592 + tmp594 = tmp573 + tmp593 + tl.store(out_ptr64 + (x16), tmp565, None) + tl.store(out_ptr66 + (x16), tmp594, None) + tl.store(out_ptr67 + (x16), tmp572, None) + elif pid < num_xblocks_17: + pid_offset = pid - num_xblocks_16 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x17 = xindex + tmp595 = tl.load(in_ptr85 + (x17), xmask) + tmp596 = tl.load(in_ptr86 + (x17), xmask) + tmp601 = tl.load(in_ptr87 + (x17), xmask) + tmp608 = tl.load(in_ptr88 + (x17), xmask) + tmp610 = tl.load(in_ptr89 + (0)) + tmp611 = tl.broadcast_to(tmp610, [XBLOCK]) + tmp597 = tmp596 - tmp595 + tmp598 = 0.09999999999999998 + tmp599 = tmp597 * tmp598 + tmp600 = tmp595 + tmp599 + tmp602 = 0.999 + tmp603 = tmp601 * tmp602 + tmp604 = tmp596 * tmp596 + tmp605 = 0.0010000000000000009 + tmp606 = tmp604 * tmp605 + tmp607 = tmp603 + tmp606 + tmp609 = libdevice.sqrt(tmp607) + tmp612 = libdevice.pow(tmp602, tmp611) + tmp613 = 1.0 + tmp614 = tmp612 - tmp613 + tmp615 = -tmp614 + tmp616 = libdevice.sqrt(tmp615) + tmp617 = tmp609 / tmp616 + tmp618 = 1e-08 + tmp619 = tmp617 + tmp618 + tmp620 = 0.9 + tmp621 = libdevice.pow(tmp620, tmp611) + tmp622 = tmp621 - tmp613 + tmp623 = 100.0 + tmp624 = tmp622 * tmp623 + tmp625 = tl.full([1], 1, tl.int32) + tmp626 = tmp625 / tmp624 + tmp627 = tmp619 / tmp626 + tmp628 = tmp600 / tmp627 + tmp629 = tmp608 + tmp628 + tl.store(out_ptr68 + (x17), tmp600, xmask) + tl.store(out_ptr70 + (x17), tmp629, xmask) + tl.store(out_ptr71 + (x17), tmp607, xmask) + elif pid < num_xblocks_18: + pid_offset = pid - num_xblocks_17 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x18 = xindex + tmp630 = tl.load(in_ptr90 + (x18), None) + tmp631 = tl.load(in_ptr91 + (x18), None) + tmp636 = tl.load(in_ptr92 + (x18), None) + tmp643 = tl.load(in_ptr93 + (x18), None) + tmp645 = tl.load(in_ptr94 + (0)) + tmp646 = tl.broadcast_to(tmp645, [XBLOCK]) + tmp632 = tmp631 - tmp630 + tmp633 = 0.09999999999999998 + tmp634 = tmp632 * tmp633 + tmp635 = tmp630 + tmp634 + tmp637 = 0.999 + tmp638 = tmp636 * tmp637 + tmp639 = tmp631 * tmp631 + tmp640 = 0.0010000000000000009 + tmp641 = tmp639 * tmp640 + tmp642 = tmp638 + tmp641 + tmp644 = libdevice.sqrt(tmp642) + tmp647 = libdevice.pow(tmp637, tmp646) + tmp648 = 1.0 + tmp649 = tmp647 - tmp648 + tmp650 = -tmp649 + tmp651 = libdevice.sqrt(tmp650) + tmp652 = tmp644 / tmp651 + tmp653 = 1e-08 + tmp654 = tmp652 + tmp653 + tmp655 = 0.9 + tmp656 = libdevice.pow(tmp655, tmp646) + tmp657 = tmp656 - tmp648 + tmp658 = 100.0 + tmp659 = tmp657 * tmp658 + tmp660 = tl.full([1], 1, tl.int32) + tmp661 = tmp660 / tmp659 + tmp662 = tmp654 / tmp661 + tmp663 = tmp635 / tmp662 + tmp664 = tmp643 + tmp663 + tl.store(out_ptr72 + (x18), tmp635, None) + tl.store(out_ptr74 + (x18), tmp664, None) + tl.store(out_ptr75 + (x18), tmp642, None) + else: + pass + ''', device_str='cuda') + + + # kernel path: /tmp/tmp2ln889l5/4r/c4rqbae7wfx3e3mqatzcxcyp2mum6rfdbywgpqy7ptg44uaonbzr.py + # Source Nodes: [], Original ATen: [] + + triton_for_fused_9 = async_compile.triton('triton_', ''' + import triton + import triton.language as tl + from triton.compiler.compiler import AttrsDescriptor + + from torch._inductor.runtime import triton_helpers, triton_heuristics + from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math + from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties + + @triton_heuristics.foreach( + num_warps=8, + triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119), equal_to_1=())]}, + inductor_meta={'kernel_name': 'triton_for_fused_9', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr8', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr7', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}, + ) + @triton.jit + def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59): + pid = tl.program_id(0) + XBLOCK: tl.constexpr = 1024 + num_xblocks_0 = tl.cdiv(768, XBLOCK) + num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK) + num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK) + num_xblocks_3 = num_xblocks_2 + tl.cdiv(1769472, XBLOCK) + num_xblocks_4 = num_xblocks_3 + tl.cdiv(2304, XBLOCK) + num_xblocks_5 = num_xblocks_4 + tl.cdiv(589824, XBLOCK) + num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK) + num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK) + num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK) + num_xblocks_9 = num_xblocks_8 + tl.cdiv(2359296, XBLOCK) + num_xblocks_10 = num_xblocks_9 + tl.cdiv(3072, XBLOCK) + num_xblocks_11 = num_xblocks_10 + tl.cdiv(2359296, XBLOCK) + num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK) + num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK) + num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK) + if pid < num_xblocks_0: + pid_offset = pid + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x0 = xindex + tmp0 = tl.load(in_ptr0 + (x0), xmask) + tmp1 = tl.load(in_ptr1 + (x0), xmask) + tmp6 = tl.load(in_ptr2 + (x0), xmask) + tmp13 = tl.load(in_ptr3 + (x0), xmask) + tmp15 = tl.load(in_ptr4 + (0)) + tmp16 = tl.broadcast_to(tmp15, [XBLOCK]) + tmp2 = tmp1 - tmp0 + tmp3 = 0.09999999999999998 + tmp4 = tmp2 * tmp3 + tmp5 = tmp0 + tmp4 + tmp7 = 0.999 + tmp8 = tmp6 * tmp7 + tmp9 = tmp1 * tmp1 + tmp10 = 0.0010000000000000009 + tmp11 = tmp9 * tmp10 + tmp12 = tmp8 + tmp11 + tmp14 = libdevice.sqrt(tmp12) + tmp17 = libdevice.pow(tmp7, tmp16) + tmp18 = 1.0 + tmp19 = tmp17 - tmp18 + tmp20 = -tmp19 + tmp21 = libdevice.sqrt(tmp20) + tmp22 = tmp14 / tmp21 + tmp23 = 1e-08 + tmp24 = tmp22 + tmp23 + tmp25 = 0.9 + tmp26 = libdevice.pow(tmp25, tmp16) + tmp27 = tmp26 - tmp18 + tmp28 = 100.0 + tmp29 = tmp27 * tmp28 + tmp30 = tl.full([1], 1, tl.int32) + tmp31 = tmp30 / tmp29 + tmp32 = tmp24 / tmp31 + tmp33 = tmp5 / tmp32 + tmp34 = tmp13 + tmp33 + tl.store(out_ptr0 + (x0), tmp5, xmask) + tl.store(out_ptr2 + (x0), tmp34, xmask) + tl.store(out_ptr3 + (x0), tmp12, xmask) + elif pid < num_xblocks_1: + pid_offset = pid - num_xblocks_0 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x1 = xindex + tmp35 = tl.load(in_ptr5 + (x1), xmask) + tmp36 = tl.load(in_ptr6 + (x1), xmask) + tmp41 = tl.load(in_ptr7 + (x1), xmask) + tmp48 = tl.load(in_ptr8 + (x1), xmask) + tmp50 = tl.load(in_ptr9 + (0)) + tmp51 = tl.broadcast_to(tmp50, [XBLOCK]) + tmp37 = tmp36 - tmp35 + tmp38 = 0.09999999999999998 + tmp39 = tmp37 * tmp38 + tmp40 = tmp35 + tmp39 + tmp42 = 0.999 + tmp43 = tmp41 * tmp42 + tmp44 = tmp36 * tmp36 + tmp45 = 0.0010000000000000009 + tmp46 = tmp44 * tmp45 + tmp47 = tmp43 + tmp46 + tmp49 = libdevice.sqrt(tmp47) + tmp52 = libdevice.pow(tmp42, tmp51) + tmp53 = 1.0 + tmp54 = tmp52 - tmp53 + tmp55 = -tmp54 + tmp56 = libdevice.sqrt(tmp55) + tmp57 = tmp49 / tmp56 + tmp58 = 1e-08 + tmp59 = tmp57 + tmp58 + tmp60 = 0.9 + tmp61 = libdevice.pow(tmp60, tmp51) + tmp62 = tmp61 - tmp53 + tmp63 = 100.0 + tmp64 = tmp62 * tmp63 + tmp65 = tl.full([1], 1, tl.int32) + tmp66 = tmp65 / tmp64 + tmp67 = tmp59 / tmp66 + tmp68 = tmp40 / tmp67 + tmp69 = tmp48 + tmp68 + tl.store(out_ptr4 + (x1), tmp40, xmask) + tl.store(out_ptr6 + (x1), tmp69, xmask) + tl.store(out_ptr7 + (x1), tmp47, xmask) + elif pid < num_xblocks_2: + pid_offset = pid - num_xblocks_1 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x2 = xindex + tmp70 = tl.load(in_ptr10 + (x2), xmask) + tmp71 = tl.load(in_ptr11 + (x2), xmask) + tmp76 = tl.load(in_ptr12 + (x2), xmask) + tmp83 = tl.load(in_ptr13 + (x2), xmask) + tmp85 = tl.load(in_ptr14 + (0)) + tmp86 = tl.broadcast_to(tmp85, [XBLOCK]) + tmp72 = tmp71 - tmp70 + tmp73 = 0.09999999999999998 + tmp74 = tmp72 * tmp73 + tmp75 = tmp70 + tmp74 + tmp77 = 0.999 + tmp78 = tmp76 * tmp77 + tmp79 = tmp71 * tmp71 + tmp80 = 0.0010000000000000009 + tmp81 = tmp79 * tmp80 + tmp82 = tmp78 + tmp81 + tmp84 = libdevice.sqrt(tmp82) + tmp87 = libdevice.pow(tmp77, tmp86) + tmp88 = 1.0 + tmp89 = tmp87 - tmp88 + tmp90 = -tmp89 + tmp91 = libdevice.sqrt(tmp90) + tmp92 = tmp84 / tmp91 + tmp93 = 1e-08 + tmp94 = tmp92 + tmp93 + tmp95 = 0.9 + tmp96 = libdevice.pow(tmp95, tmp86) + tmp97 = tmp96 - tmp88 + tmp98 = 100.0 + tmp99 = tmp97 * tmp98 + tmp100 = tl.full([1], 1, tl.int32) + tmp101 = tmp100 / tmp99 + tmp102 = tmp94 / tmp101 + tmp103 = tmp75 / tmp102 + tmp104 = tmp83 + tmp103 + tl.store(out_ptr8 + (x2), tmp75, xmask) + tl.store(out_ptr10 + (x2), tmp104, xmask) + tl.store(out_ptr11 + (x2), tmp82, xmask) + elif pid < num_xblocks_3: + pid_offset = pid - num_xblocks_2 + xnumel = 1769472 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x3 = xindex + tmp105 = tl.load(in_ptr15 + (x3), None) + tmp106 = tl.load(in_ptr16 + (x3), None) + tmp111 = tl.load(in_ptr17 + (x3), None) + tmp118 = tl.load(in_ptr18 + (x3), None) + tmp120 = tl.load(in_ptr19 + (0)) + tmp121 = tl.broadcast_to(tmp120, [XBLOCK]) + tmp107 = tmp106 - tmp105 + tmp108 = 0.09999999999999998 + tmp109 = tmp107 * tmp108 + tmp110 = tmp105 + tmp109 + tmp112 = 0.999 + tmp113 = tmp111 * tmp112 + tmp114 = tmp106 * tmp106 + tmp115 = 0.0010000000000000009 + tmp116 = tmp114 * tmp115 + tmp117 = tmp113 + tmp116 + tmp119 = libdevice.sqrt(tmp117) + tmp122 = libdevice.pow(tmp112, tmp121) + tmp123 = 1.0 + tmp124 = tmp122 - tmp123 + tmp125 = -tmp124 + tmp126 = libdevice.sqrt(tmp125) + tmp127 = tmp119 / tmp126 + tmp128 = 1e-08 + tmp129 = tmp127 + tmp128 + tmp130 = 0.9 + tmp131 = libdevice.pow(tmp130, tmp121) + tmp132 = tmp131 - tmp123 + tmp133 = 100.0 + tmp134 = tmp132 * tmp133 + tmp135 = tl.full([1], 1, tl.int32) + tmp136 = tmp135 / tmp134 + tmp137 = tmp129 / tmp136 + tmp138 = tmp110 / tmp137 + tmp139 = tmp118 + tmp138 + tl.store(out_ptr12 + (x3), tmp110, None) + tl.store(out_ptr14 + (x3), tmp139, None) + tl.store(out_ptr15 + (x3), tmp117, None) + elif pid < num_xblocks_4: + pid_offset = pid - num_xblocks_3 + xnumel = 2304 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x4 = xindex + tmp140 = tl.load(in_ptr20 + (x4), xmask) + tmp141 = tl.load(in_ptr21 + (x4), xmask) + tmp146 = tl.load(in_ptr22 + (x4), xmask) + tmp153 = tl.load(in_ptr23 + (x4), xmask) + tmp155 = tl.load(in_ptr24 + (0)) + tmp156 = tl.broadcast_to(tmp155, [XBLOCK]) + tmp142 = tmp141 - tmp140 + tmp143 = 0.09999999999999998 + tmp144 = tmp142 * tmp143 + tmp145 = tmp140 + tmp144 + tmp147 = 0.999 + tmp148 = tmp146 * tmp147 + tmp149 = tmp141 * tmp141 + tmp150 = 0.0010000000000000009 + tmp151 = tmp149 * tmp150 + tmp152 = tmp148 + tmp151 + tmp154 = libdevice.sqrt(tmp152) + tmp157 = libdevice.pow(tmp147, tmp156) + tmp158 = 1.0 + tmp159 = tmp157 - tmp158 + tmp160 = -tmp159 + tmp161 = libdevice.sqrt(tmp160) + tmp162 = tmp154 / tmp161 + tmp163 = 1e-08 + tmp164 = tmp162 + tmp163 + tmp165 = 0.9 + tmp166 = libdevice.pow(tmp165, tmp156) + tmp167 = tmp166 - tmp158 + tmp168 = 100.0 + tmp169 = tmp167 * tmp168 + tmp170 = tl.full([1], 1, tl.int32) + tmp171 = tmp170 / tmp169 + tmp172 = tmp164 / tmp171 + tmp173 = tmp145 / tmp172 + tmp174 = tmp153 + tmp173 + tl.store(out_ptr16 + (x4), tmp145, xmask) + tl.store(out_ptr18 + (x4), tmp174, xmask) + tl.store(out_ptr19 + (x4), tmp152, xmask) + elif pid < num_xblocks_5: + pid_offset = pid - num_xblocks_4 + xnumel = 589824 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x5 = xindex + tmp175 = tl.load(in_ptr25 + (x5), None) + tmp176 = tl.load(in_ptr26 + (x5), None) + tmp181 = tl.load(in_ptr27 + (x5), None) + tmp188 = tl.load(in_ptr28 + (x5), None) + tmp190 = tl.load(in_ptr29 + (0)) + tmp191 = tl.broadcast_to(tmp190, [XBLOCK]) + tmp177 = tmp176 - tmp175 + tmp178 = 0.09999999999999998 + tmp179 = tmp177 * tmp178 + tmp180 = tmp175 + tmp179 + tmp182 = 0.999 + tmp183 = tmp181 * tmp182 + tmp184 = tmp176 * tmp176 + tmp185 = 0.0010000000000000009 + tmp186 = tmp184 * tmp185 + tmp187 = tmp183 + tmp186 + tmp189 = libdevice.sqrt(tmp187) + tmp192 = libdevice.pow(tmp182, tmp191) + tmp193 = 1.0 + tmp194 = tmp192 - tmp193 + tmp195 = -tmp194 + tmp196 = libdevice.sqrt(tmp195) + tmp197 = tmp189 / tmp196 + tmp198 = 1e-08 + tmp199 = tmp197 + tmp198 + tmp200 = 0.9 + tmp201 = libdevice.pow(tmp200, tmp191) + tmp202 = tmp201 - tmp193 + tmp203 = 100.0 + tmp204 = tmp202 * tmp203 + tmp205 = tl.full([1], 1, tl.int32) + tmp206 = tmp205 / tmp204 + tmp207 = tmp199 / tmp206 + tmp208 = tmp180 / tmp207 + tmp209 = tmp188 + tmp208 + tl.store(out_ptr20 + (x5), tmp180, None) + tl.store(out_ptr22 + (x5), tmp209, None) + tl.store(out_ptr23 + (x5), tmp187, None) + elif pid < num_xblocks_6: + pid_offset = pid - num_xblocks_5 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x6 = xindex + tmp210 = tl.load(in_ptr30 + (x6), xmask) + tmp211 = tl.load(in_ptr31 + (x6), xmask) + tmp216 = tl.load(in_ptr32 + (x6), xmask) + tmp223 = tl.load(in_ptr33 + (x6), xmask) + tmp225 = tl.load(in_ptr34 + (0)) + tmp226 = tl.broadcast_to(tmp225, [XBLOCK]) + tmp212 = tmp211 - tmp210 + tmp213 = 0.09999999999999998 + tmp214 = tmp212 * tmp213 + tmp215 = tmp210 + tmp214 + tmp217 = 0.999 + tmp218 = tmp216 * tmp217 + tmp219 = tmp211 * tmp211 + tmp220 = 0.0010000000000000009 + tmp221 = tmp219 * tmp220 + tmp222 = tmp218 + tmp221 + tmp224 = libdevice.sqrt(tmp222) + tmp227 = libdevice.pow(tmp217, tmp226) + tmp228 = 1.0 + tmp229 = tmp227 - tmp228 + tmp230 = -tmp229 + tmp231 = libdevice.sqrt(tmp230) + tmp232 = tmp224 / tmp231 + tmp233 = 1e-08 + tmp234 = tmp232 + tmp233 + tmp235 = 0.9 + tmp236 = libdevice.pow(tmp235, tmp226) + tmp237 = tmp236 - tmp228 + tmp238 = 100.0 + tmp239 = tmp237 * tmp238 + tmp240 = tl.full([1], 1, tl.int32) + tmp241 = tmp240 / tmp239 + tmp242 = tmp234 / tmp241 + tmp243 = tmp215 / tmp242 + tmp244 = tmp223 + tmp243 + tl.store(out_ptr24 + (x6), tmp215, xmask) + tl.store(out_ptr26 + (x6), tmp244, xmask) + tl.store(out_ptr27 + (x6), tmp222, xmask) + elif pid < num_xblocks_7: + pid_offset = pid - num_xblocks_6 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x7 = xindex + tmp245 = tl.load(in_ptr35 + (x7), xmask) + tmp246 = tl.load(in_ptr36 + (x7), xmask) + tmp251 = tl.load(in_ptr37 + (x7), xmask) + tmp258 = tl.load(in_ptr38 + (x7), xmask) + tmp260 = tl.load(in_ptr39 + (0)) + tmp261 = tl.broadcast_to(tmp260, [XBLOCK]) + tmp247 = tmp246 - tmp245 + tmp248 = 0.09999999999999998 + tmp249 = tmp247 * tmp248 + tmp250 = tmp245 + tmp249 + tmp252 = 0.999 + tmp253 = tmp251 * tmp252 + tmp254 = tmp246 * tmp246 + tmp255 = 0.0010000000000000009 + tmp256 = tmp254 * tmp255 + tmp257 = tmp253 + tmp256 + tmp259 = libdevice.sqrt(tmp257) + tmp262 = libdevice.pow(tmp252, tmp261) + tmp263 = 1.0 + tmp264 = tmp262 - tmp263 + tmp265 = -tmp264 + tmp266 = libdevice.sqrt(tmp265) + tmp267 = tmp259 / tmp266 + tmp268 = 1e-08 + tmp269 = tmp267 + tmp268 + tmp270 = 0.9 + tmp271 = libdevice.pow(tmp270, tmp261) + tmp272 = tmp271 - tmp263 + tmp273 = 100.0 + tmp274 = tmp272 * tmp273 + tmp275 = tl.full([1], 1, tl.int32) + tmp276 = tmp275 / tmp274 + tmp277 = tmp269 / tmp276 + tmp278 = tmp250 / tmp277 + tmp279 = tmp258 + tmp278 + tl.store(out_ptr28 + (x7), tmp250, xmask) + tl.store(out_ptr30 + (x7), tmp279, xmask) + tl.store(out_ptr31 + (x7), tmp257, xmask) + elif pid < num_xblocks_8: + pid_offset = pid - num_xblocks_7 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x8 = xindex + tmp280 = tl.load(in_ptr40 + (x8), xmask) + tmp281 = tl.load(in_ptr41 + (x8), xmask) + tmp286 = tl.load(in_ptr42 + (x8), xmask) + tmp293 = tl.load(in_ptr43 + (x8), xmask) + tmp295 = tl.load(in_ptr44 + (0)) + tmp296 = tl.broadcast_to(tmp295, [XBLOCK]) + tmp282 = tmp281 - tmp280 + tmp283 = 0.09999999999999998 + tmp284 = tmp282 * tmp283 + tmp285 = tmp280 + tmp284 + tmp287 = 0.999 + tmp288 = tmp286 * tmp287 + tmp289 = tmp281 * tmp281 + tmp290 = 0.0010000000000000009 + tmp291 = tmp289 * tmp290 + tmp292 = tmp288 + tmp291 + tmp294 = libdevice.sqrt(tmp292) + tmp297 = libdevice.pow(tmp287, tmp296) + tmp298 = 1.0 + tmp299 = tmp297 - tmp298 + tmp300 = -tmp299 + tmp301 = libdevice.sqrt(tmp300) + tmp302 = tmp294 / tmp301 + tmp303 = 1e-08 + tmp304 = tmp302 + tmp303 + tmp305 = 0.9 + tmp306 = libdevice.pow(tmp305, tmp296) + tmp307 = tmp306 - tmp298 + tmp308 = 100.0 + tmp309 = tmp307 * tmp308 + tmp310 = tl.full([1], 1, tl.int32) + tmp311 = tmp310 / tmp309 + tmp312 = tmp304 / tmp311 + tmp313 = tmp285 / tmp312 + tmp314 = tmp293 + tmp313 + tl.store(out_ptr32 + (x8), tmp285, xmask) + tl.store(out_ptr34 + (x8), tmp314, xmask) + tl.store(out_ptr35 + (x8), tmp292, xmask) + elif pid < num_xblocks_9: + pid_offset = pid - num_xblocks_8 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x9 = xindex + tmp315 = tl.load(in_ptr45 + (x9), None) + tmp316 = tl.load(in_ptr46 + (x9), None) + tmp321 = tl.load(in_ptr47 + (x9), None) + tmp328 = tl.load(in_ptr48 + (x9), None) + tmp330 = tl.load(in_ptr49 + (0)) + tmp331 = tl.broadcast_to(tmp330, [XBLOCK]) + tmp317 = tmp316 - tmp315 + tmp318 = 0.09999999999999998 + tmp319 = tmp317 * tmp318 + tmp320 = tmp315 + tmp319 + tmp322 = 0.999 + tmp323 = tmp321 * tmp322 + tmp324 = tmp316 * tmp316 + tmp325 = 0.0010000000000000009 + tmp326 = tmp324 * tmp325 + tmp327 = tmp323 + tmp326 + tmp329 = libdevice.sqrt(tmp327) + tmp332 = libdevice.pow(tmp322, tmp331) + tmp333 = 1.0 + tmp334 = tmp332 - tmp333 + tmp335 = -tmp334 + tmp336 = libdevice.sqrt(tmp335) + tmp337 = tmp329 / tmp336 + tmp338 = 1e-08 + tmp339 = tmp337 + tmp338 + tmp340 = 0.9 + tmp341 = libdevice.pow(tmp340, tmp331) + tmp342 = tmp341 - tmp333 + tmp343 = 100.0 + tmp344 = tmp342 * tmp343 + tmp345 = tl.full([1], 1, tl.int32) + tmp346 = tmp345 / tmp344 + tmp347 = tmp339 / tmp346 + tmp348 = tmp320 / tmp347 + tmp349 = tmp328 + tmp348 + tl.store(out_ptr36 + (x9), tmp320, None) + tl.store(out_ptr38 + (x9), tmp349, None) + tl.store(out_ptr39 + (x9), tmp327, None) + elif pid < num_xblocks_10: + pid_offset = pid - num_xblocks_9 + xnumel = 3072 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x10 = xindex + tmp350 = tl.load(in_ptr50 + (x10), xmask) + tmp351 = tl.load(in_ptr51 + (x10), xmask) + tmp356 = tl.load(in_ptr52 + (x10), xmask) + tmp363 = tl.load(in_ptr53 + (x10), xmask) + tmp365 = tl.load(in_ptr54 + (0)) + tmp366 = tl.broadcast_to(tmp365, [XBLOCK]) + tmp352 = tmp351 - tmp350 + tmp353 = 0.09999999999999998 + tmp354 = tmp352 * tmp353 + tmp355 = tmp350 + tmp354 + tmp357 = 0.999 + tmp358 = tmp356 * tmp357 + tmp359 = tmp351 * tmp351 + tmp360 = 0.0010000000000000009 + tmp361 = tmp359 * tmp360 + tmp362 = tmp358 + tmp361 + tmp364 = libdevice.sqrt(tmp362) + tmp367 = libdevice.pow(tmp357, tmp366) + tmp368 = 1.0 + tmp369 = tmp367 - tmp368 + tmp370 = -tmp369 + tmp371 = libdevice.sqrt(tmp370) + tmp372 = tmp364 / tmp371 + tmp373 = 1e-08 + tmp374 = tmp372 + tmp373 + tmp375 = 0.9 + tmp376 = libdevice.pow(tmp375, tmp366) + tmp377 = tmp376 - tmp368 + tmp378 = 100.0 + tmp379 = tmp377 * tmp378 + tmp380 = tl.full([1], 1, tl.int32) + tmp381 = tmp380 / tmp379 + tmp382 = tmp374 / tmp381 + tmp383 = tmp355 / tmp382 + tmp384 = tmp363 + tmp383 + tl.store(out_ptr40 + (x10), tmp355, xmask) + tl.store(out_ptr42 + (x10), tmp384, xmask) + tl.store(out_ptr43 + (x10), tmp362, xmask) + elif pid < num_xblocks_11: + pid_offset = pid - num_xblocks_10 + xnumel = 2359296 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = tl.full([XBLOCK], True, tl.int1) + x11 = xindex + tmp385 = tl.load(in_ptr55 + (x11), None) + tmp386 = tl.load(in_ptr56 + (x11), None) + tmp391 = tl.load(in_ptr57 + (x11), None) + tmp398 = tl.load(in_ptr58 + (x11), None) + tmp400 = tl.load(in_ptr59 + (0)) + tmp401 = tl.broadcast_to(tmp400, [XBLOCK]) + tmp387 = tmp386 - tmp385 + tmp388 = 0.09999999999999998 + tmp389 = tmp387 * tmp388 + tmp390 = tmp385 + tmp389 + tmp392 = 0.999 + tmp393 = tmp391 * tmp392 + tmp394 = tmp386 * tmp386 + tmp395 = 0.0010000000000000009 + tmp396 = tmp394 * tmp395 + tmp397 = tmp393 + tmp396 + tmp399 = libdevice.sqrt(tmp397) + tmp402 = libdevice.pow(tmp392, tmp401) + tmp403 = 1.0 + tmp404 = tmp402 - tmp403 + tmp405 = -tmp404 + tmp406 = libdevice.sqrt(tmp405) + tmp407 = tmp399 / tmp406 + tmp408 = 1e-08 + tmp409 = tmp407 + tmp408 + tmp410 = 0.9 + tmp411 = libdevice.pow(tmp410, tmp401) + tmp412 = tmp411 - tmp403 + tmp413 = 100.0 + tmp414 = tmp412 * tmp413 + tmp415 = tl.full([1], 1, tl.int32) + tmp416 = tmp415 / tmp414 + tmp417 = tmp409 / tmp416 + tmp418 = tmp390 / tmp417 + tmp419 = tmp398 + tmp418 + tl.store(out_ptr44 + (x11), tmp390, None) + tl.store(out_ptr46 + (x11), tmp419, None) + tl.store(out_ptr47 + (x11), tmp397, None) + elif pid < num_xblocks_12: + pid_offset = pid - num_xblocks_11 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x12 = xindex + tmp420 = tl.load(in_ptr60 + (x12), xmask) + tmp421 = tl.load(in_ptr61 + (x12), xmask) + tmp426 = tl.load(in_ptr62 + (x12), xmask) + tmp433 = tl.load(in_ptr63 + (x12), xmask) + tmp435 = tl.load(in_ptr64 + (0)) + tmp436 = tl.broadcast_to(tmp435, [XBLOCK]) + tmp422 = tmp421 - tmp420 + tmp423 = 0.09999999999999998 + tmp424 = tmp422 * tmp423 + tmp425 = tmp420 + tmp424 + tmp427 = 0.999 + tmp428 = tmp426 * tmp427 + tmp429 = tmp421 * tmp421 + tmp430 = 0.0010000000000000009 + tmp431 = tmp429 * tmp430 + tmp432 = tmp428 + tmp431 + tmp434 = libdevice.sqrt(tmp432) + tmp437 = libdevice.pow(tmp427, tmp436) + tmp438 = 1.0 + tmp439 = tmp437 - tmp438 + tmp440 = -tmp439 + tmp441 = libdevice.sqrt(tmp440) + tmp442 = tmp434 / tmp441 + tmp443 = 1e-08 + tmp444 = tmp442 + tmp443 + tmp445 = 0.9 + tmp446 = libdevice.pow(tmp445, tmp436) + tmp447 = tmp446 - tmp438 + tmp448 = 100.0 + tmp449 = tmp447 * tmp448 + tmp450 = tl.full([1], 1, tl.int32) + tmp451 = tmp450 / tmp449 + tmp452 = tmp444 / tmp451 + tmp453 = tmp425 / tmp452 + tmp454 = tmp433 + tmp453 + tl.store(out_ptr48 + (x12), tmp425, xmask) + tl.store(out_ptr50 + (x12), tmp454, xmask) + tl.store(out_ptr51 + (x12), tmp432, xmask) + elif pid < num_xblocks_13: + pid_offset = pid - num_xblocks_12 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x13 = xindex + tmp455 = tl.load(in_ptr65 + (x13), xmask) + tmp456 = tl.load(in_ptr66 + (x13), xmask) + tmp461 = tl.load(in_ptr67 + (x13), xmask) + tmp468 = tl.load(in_ptr68 + (x13), xmask) + tmp470 = tl.load(in_ptr69 + (0)) + tmp471 = tl.broadcast_to(tmp470, [XBLOCK]) + tmp457 = tmp456 - tmp455 + tmp458 = 0.09999999999999998 + tmp459 = tmp457 * tmp458 + tmp460 = tmp455 + tmp459 + tmp462 = 0.999 + tmp463 = tmp461 * tmp462 + tmp464 = tmp456 * tmp456 + tmp465 = 0.0010000000000000009 + tmp466 = tmp464 * tmp465 + tmp467 = tmp463 + tmp466 + tmp469 = libdevice.sqrt(tmp467) + tmp472 = libdevice.pow(tmp462, tmp471) + tmp473 = 1.0 + tmp474 = tmp472 - tmp473 + tmp475 = -tmp474 + tmp476 = libdevice.sqrt(tmp475) + tmp477 = tmp469 / tmp476 + tmp478 = 1e-08 + tmp479 = tmp477 + tmp478 + tmp480 = 0.9 + tmp481 = libdevice.pow(tmp480, tmp471) + tmp482 = tmp481 - tmp473 + tmp483 = 100.0 + tmp484 = tmp482 * tmp483 + tmp485 = tl.full([1], 1, tl.int32) + tmp486 = tmp485 / tmp484 + tmp487 = tmp479 / tmp486 + tmp488 = tmp460 / tmp487 + tmp489 = tmp468 + tmp488 + tl.store(out_ptr52 + (x13), tmp460, xmask) + tl.store(out_ptr54 + (x13), tmp489, xmask) + tl.store(out_ptr55 + (x13), tmp467, xmask) + elif pid < num_xblocks_14: + pid_offset = pid - num_xblocks_13 + xnumel = 768 + rnumel = 1 + xoffset = pid_offset * XBLOCK + xindex = xoffset + tl.arange(0, XBLOCK)[:] + xmask = xindex < xnumel + x14 = xindex + tmp490 = tl.load(in_ptr70 + (x14), xmask) + tmp491 = tl.load(in_ptr71 + (x14), xmask) + tmp496 = tl.load(in_ptr72 + (x14), xmask) + tmp503 = tl.load(in_ptr73 + (x14), xmask) + tmp505 = tl.load(in_ptr74 + (0)) + tmp506 = tl.broadcast_to(tmp505, [XBLOCK]) + tmp492 = tmp491 - tmp490 + tmp493 = 0.09999999999999998 + tmp494 = tmp492 * tmp493 + tmp495 = tmp490 + tmp494 + tmp497 = 0.999 + tmp498 = tmp496 * tmp497 + tmp499 = tmp491 * tmp491 + tmp500 = 0.0010000000000000009 + tmp501 = tmp499 * tmp500 + tmp502 = tmp498 + tmp501 + tmp504 = libdevice.sqrt(tmp502) + tmp507 = libdevice.pow(tmp497, tmp506) + tmp508 = 1.0 + tmp509 = tmp507 - tmp508 + tmp510 = -tmp509 + tmp511 = libdevice.sqrt(tmp510) + tmp512 = tmp504 / tmp511 + tmp513 = 1e-08 + tmp514 = tmp512 + tmp513 + tmp515 = 0.9 + tmp516 = libdevice.pow(tmp515, tmp506) + tmp517 = tmp516 - tmp508 + tmp518 = 100.0 + tmp519 = tmp517 * tmp518 + tmp520 = tl.full([1], 1, tl.int32) + tmp521 = tmp520 / tmp519 + tmp522 = tmp514 / tmp521 + tmp523 = tmp495 / tmp522 + tmp524 = tmp503 + tmp523 + tl.store(out_ptr56 + (x14), tmp495, xmask) + tl.store(out_ptr58 + (x14), tmp524, xmask) + tl.store(out_ptr59 + (x14), tmp502, xmask) + else: + pass + ''', device_str='cuda') + + + async_compile.wait(globals()) + del async_compile + + def call(args): + arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1 = args + args.clear() + assert_size_stride(arg0_1, (50304, 768), (768, 1)) + assert_size_stride(arg1_1, (1024, 768), (768, 1)) + assert_size_stride(arg2_1, (768, ), (1, )) + assert_size_stride(arg3_1, (768, ), (1, )) + assert_size_stride(arg4_1, (2304, 768), (768, 1)) + assert_size_stride(arg5_1, (2304, ), (1, )) + assert_size_stride(arg6_1, (768, 768), (768, 1)) + assert_size_stride(arg7_1, (768, ), (1, )) + assert_size_stride(arg8_1, (768, ), (1, )) + assert_size_stride(arg9_1, (768, ), (1, )) + assert_size_stride(arg10_1, (3072, 768), (768, 1)) + assert_size_stride(arg11_1, (3072, ), (1, )) + assert_size_stride(arg12_1, (768, 3072), (3072, 1)) + assert_size_stride(arg13_1, (768, ), (1, )) + assert_size_stride(arg14_1, (768, ), (1, )) + assert_size_stride(arg15_1, (768, ), (1, )) + assert_size_stride(arg16_1, (2304, 768), (768, 1)) + assert_size_stride(arg17_1, (2304, ), (1, )) + assert_size_stride(arg18_1, (768, 768), (768, 1)) + assert_size_stride(arg19_1, (768, ), (1, )) + assert_size_stride(arg20_1, (768, ), (1, )) + assert_size_stride(arg21_1, (768, ), (1, )) + assert_size_stride(arg22_1, (3072, 768), (768, 1)) + assert_size_stride(arg23_1, (3072, ), (1, )) + assert_size_stride(arg24_1, (768, 3072), (3072, 1)) + assert_size_stride(arg25_1, (768, ), (1, )) + assert_size_stride(arg26_1, (768, ), (1, )) + assert_size_stride(arg27_1, (768, ), (1, )) + assert_size_stride(arg28_1, (2304, 768), (768, 1)) + assert_size_stride(arg29_1, (2304, ), (1, )) + assert_size_stride(arg30_1, (768, 768), (768, 1)) + assert_size_stride(arg31_1, (768, ), (1, )) + assert_size_stride(arg32_1, (768, ), (1, )) + assert_size_stride(arg33_1, (768, ), (1, )) + assert_size_stride(arg34_1, (3072, 768), (768, 1)) + assert_size_stride(arg35_1, (3072, ), (1, )) + assert_size_stride(arg36_1, (768, 3072), (3072, 1)) + assert_size_stride(arg37_1, (768, ), (1, )) + assert_size_stride(arg38_1, (768, ), (1, )) + assert_size_stride(arg39_1, (768, ), (1, )) + assert_size_stride(arg40_1, (2304, 768), (768, 1)) + assert_size_stride(arg41_1, (2304, ), (1, )) + assert_size_stride(arg42_1, (768, 768), (768, 1)) + assert_size_stride(arg43_1, (768, ), (1, )) + assert_size_stride(arg44_1, (768, ), (1, )) + assert_size_stride(arg45_1, (768, ), (1, )) + assert_size_stride(arg46_1, (3072, 768), (768, 1)) + assert_size_stride(arg47_1, (3072, ), (1, )) + assert_size_stride(arg48_1, (768, 3072), (3072, 1)) + assert_size_stride(arg49_1, (768, ), (1, )) + assert_size_stride(arg50_1, (768, ), (1, )) + assert_size_stride(arg51_1, (768, ), (1, )) + assert_size_stride(arg52_1, (2304, 768), (768, 1)) + assert_size_stride(arg53_1, (2304, ), (1, )) + assert_size_stride(arg54_1, (768, 768), (768, 1)) + assert_size_stride(arg55_1, (768, ), (1, )) + assert_size_stride(arg56_1, (768, ), (1, )) + assert_size_stride(arg57_1, (768, ), (1, )) + assert_size_stride(arg58_1, (3072, 768), (768, 1)) + assert_size_stride(arg59_1, (3072, ), (1, )) + assert_size_stride(arg60_1, (768, 3072), (3072, 1)) + assert_size_stride(arg61_1, (768, ), (1, )) + assert_size_stride(arg62_1, (768, ), (1, )) + assert_size_stride(arg63_1, (768, ), (1, )) + assert_size_stride(arg64_1, (2304, 768), (768, 1)) + assert_size_stride(arg65_1, (2304, ), (1, )) + assert_size_stride(arg66_1, (768, 768), (768, 1)) + assert_size_stride(arg67_1, (768, ), (1, )) + assert_size_stride(arg68_1, (768, ), (1, )) + assert_size_stride(arg69_1, (768, ), (1, )) + assert_size_stride(arg70_1, (3072, 768), (768, 1)) + assert_size_stride(arg71_1, (3072, ), (1, )) + assert_size_stride(arg72_1, (768, 3072), (3072, 1)) + assert_size_stride(arg73_1, (768, ), (1, )) + assert_size_stride(arg74_1, (768, ), (1, )) + assert_size_stride(arg75_1, (768, ), (1, )) + assert_size_stride(arg76_1, (2304, 768), (768, 1)) + assert_size_stride(arg77_1, (2304, ), (1, )) + assert_size_stride(arg78_1, (768, 768), (768, 1)) + assert_size_stride(arg79_1, (768, ), (1, )) + assert_size_stride(arg80_1, (768, ), (1, )) + assert_size_stride(arg81_1, (768, ), (1, )) + assert_size_stride(arg82_1, (3072, 768), (768, 1)) + assert_size_stride(arg83_1, (3072, ), (1, )) + assert_size_stride(arg84_1, (768, 3072), (3072, 1)) + assert_size_stride(arg85_1, (768, ), (1, )) + assert_size_stride(arg86_1, (768, ), (1, )) + assert_size_stride(arg87_1, (768, ), (1, )) + assert_size_stride(arg88_1, (2304, 768), (768, 1)) + assert_size_stride(arg89_1, (2304, ), (1, )) + assert_size_stride(arg90_1, (768, 768), (768, 1)) + assert_size_stride(arg91_1, (768, ), (1, )) + assert_size_stride(arg92_1, (768, ), (1, )) + assert_size_stride(arg93_1, (768, ), (1, )) + assert_size_stride(arg94_1, (3072, 768), (768, 1)) + assert_size_stride(arg95_1, (3072, ), (1, )) + assert_size_stride(arg96_1, (768, 3072), (3072, 1)) + assert_size_stride(arg97_1, (768, ), (1, )) + assert_size_stride(arg98_1, (768, ), (1, )) + assert_size_stride(arg99_1, (768, ), (1, )) + assert_size_stride(arg100_1, (2304, 768), (768, 1)) + assert_size_stride(arg101_1, (2304, ), (1, )) + assert_size_stride(arg102_1, (768, 768), (768, 1)) + assert_size_stride(arg103_1, (768, ), (1, )) + assert_size_stride(arg104_1, (768, ), (1, )) + assert_size_stride(arg105_1, (768, ), (1, )) + assert_size_stride(arg106_1, (3072, 768), (768, 1)) + assert_size_stride(arg107_1, (3072, ), (1, )) + assert_size_stride(arg108_1, (768, 3072), (3072, 1)) + assert_size_stride(arg109_1, (768, ), (1, )) + assert_size_stride(arg110_1, (768, ), (1, )) + assert_size_stride(arg111_1, (768, ), (1, )) + assert_size_stride(arg112_1, (2304, 768), (768, 1)) + assert_size_stride(arg113_1, (2304, ), (1, )) + assert_size_stride(arg114_1, (768, 768), (768, 1)) + assert_size_stride(arg115_1, (768, ), (1, )) + assert_size_stride(arg116_1, (768, ), (1, )) + assert_size_stride(arg117_1, (768, ), (1, )) + assert_size_stride(arg118_1, (3072, 768), (768, 1)) + assert_size_stride(arg119_1, (3072, ), (1, )) + assert_size_stride(arg120_1, (768, 3072), (3072, 1)) + assert_size_stride(arg121_1, (768, ), (1, )) + assert_size_stride(arg122_1, (768, ), (1, )) + assert_size_stride(arg123_1, (768, ), (1, )) + assert_size_stride(arg124_1, (2304, 768), (768, 1)) + assert_size_stride(arg125_1, (2304, ), (1, )) + assert_size_stride(arg126_1, (768, 768), (768, 1)) + assert_size_stride(arg127_1, (768, ), (1, )) + assert_size_stride(arg128_1, (768, ), (1, )) + assert_size_stride(arg129_1, (768, ), (1, )) + assert_size_stride(arg130_1, (3072, 768), (768, 1)) + assert_size_stride(arg131_1, (3072, ), (1, )) + assert_size_stride(arg132_1, (768, 3072), (3072, 1)) + assert_size_stride(arg133_1, (768, ), (1, )) + assert_size_stride(arg134_1, (768, ), (1, )) + assert_size_stride(arg135_1, (768, ), (1, )) + assert_size_stride(arg136_1, (2304, 768), (768, 1)) + assert_size_stride(arg137_1, (2304, ), (1, )) + assert_size_stride(arg138_1, (768, 768), (768, 1)) + assert_size_stride(arg139_1, (768, ), (1, )) + assert_size_stride(arg140_1, (768, ), (1, )) + assert_size_stride(arg141_1, (768, ), (1, )) + assert_size_stride(arg142_1, (3072, 768), (768, 1)) + assert_size_stride(arg143_1, (3072, ), (1, )) + assert_size_stride(arg144_1, (768, 3072), (3072, 1)) + assert_size_stride(arg145_1, (768, ), (1, )) + assert_size_stride(arg146_1, (768, ), (1, )) + assert_size_stride(arg147_1, (768, ), (1, )) + assert_size_stride(arg148_1, (), ()) + assert_size_stride(arg149_1, (1024, 768), (768, 1)) + assert_size_stride(arg150_1, (1024, 768), (768, 1)) + assert_size_stride(arg151_1, (50304, 768), (768, 1)) + assert_size_stride(arg152_1, (1024, 768), (768, 1)) + assert_size_stride(arg153_1, (768, ), (1, )) + assert_size_stride(arg154_1, (768, ), (1, )) + assert_size_stride(arg155_1, (2304, 768), (768, 1)) + assert_size_stride(arg156_1, (2304, ), (1, )) + assert_size_stride(arg157_1, (768, 768), (768, 1)) + assert_size_stride(arg158_1, (768, ), (1, )) + assert_size_stride(arg159_1, (768, ), (1, )) + assert_size_stride(arg160_1, (768, ), (1, )) + assert_size_stride(arg161_1, (3072, 768), (768, 1)) + assert_size_stride(arg162_1, (3072, ), (1, )) + assert_size_stride(arg163_1, (768, 3072), (3072, 1)) + assert_size_stride(arg164_1, (768, ), (1, )) + assert_size_stride(arg165_1, (768, ), (1, )) + assert_size_stride(arg166_1, (768, ), (1, )) + assert_size_stride(arg167_1, (2304, 768), (768, 1)) + assert_size_stride(arg168_1, (2304, ), (1, )) + assert_size_stride(arg169_1, (768, 768), (768, 1)) + assert_size_stride(arg170_1, (768, ), (1, )) + assert_size_stride(arg171_1, (768, ), (1, )) + assert_size_stride(arg172_1, (768, ), (1, )) + assert_size_stride(arg173_1, (3072, 768), (768, 1)) + assert_size_stride(arg174_1, (3072, ), (1, )) + assert_size_stride(arg175_1, (768, 3072), (3072, 1)) + assert_size_stride(arg176_1, (768, ), (1, )) + assert_size_stride(arg177_1, (768, ), (1, )) + assert_size_stride(arg178_1, (768, ), (1, )) + assert_size_stride(arg179_1, (2304, 768), (768, 1)) + assert_size_stride(arg180_1, (2304, ), (1, )) + assert_size_stride(arg181_1, (768, 768), (768, 1)) + assert_size_stride(arg182_1, (768, ), (1, )) + assert_size_stride(arg183_1, (768, ), (1, )) + assert_size_stride(arg184_1, (768, ), (1, )) + assert_size_stride(arg185_1, (3072, 768), (768, 1)) + assert_size_stride(arg186_1, (3072, ), (1, )) + assert_size_stride(arg187_1, (768, 3072), (3072, 1)) + assert_size_stride(arg188_1, (768, ), (1, )) + assert_size_stride(arg189_1, (768, ), (1, )) + assert_size_stride(arg190_1, (768, ), (1, )) + assert_size_stride(arg191_1, (2304, 768), (768, 1)) + assert_size_stride(arg192_1, (2304, ), (1, )) + assert_size_stride(arg193_1, (768, 768), (768, 1)) + assert_size_stride(arg194_1, (768, ), (1, )) + assert_size_stride(arg195_1, (768, ), (1, )) + assert_size_stride(arg196_1, (768, ), (1, )) + assert_size_stride(arg197_1, (3072, 768), (768, 1)) + assert_size_stride(arg198_1, (3072, ), (1, )) + assert_size_stride(arg199_1, (768, 3072), (3072, 1)) + assert_size_stride(arg200_1, (768, ), (1, )) + assert_size_stride(arg201_1, (768, ), (1, )) + assert_size_stride(arg202_1, (768, ), (1, )) + assert_size_stride(arg203_1, (2304, 768), (768, 1)) + assert_size_stride(arg204_1, (2304, ), (1, )) + assert_size_stride(arg205_1, (768, 768), (768, 1)) + assert_size_stride(arg206_1, (768, ), (1, )) + assert_size_stride(arg207_1, (768, ), (1, )) + assert_size_stride(arg208_1, (768, ), (1, )) + assert_size_stride(arg209_1, (3072, 768), (768, 1)) + assert_size_stride(arg210_1, (3072, ), (1, )) + assert_size_stride(arg211_1, (768, 3072), (3072, 1)) + assert_size_stride(arg212_1, (768, ), (1, )) + assert_size_stride(arg213_1, (768, ), (1, )) + assert_size_stride(arg214_1, (768, ), (1, )) + assert_size_stride(arg215_1, (2304, 768), (768, 1)) + assert_size_stride(arg216_1, (2304, ), (1, )) + assert_size_stride(arg217_1, (768, 768), (768, 1)) + assert_size_stride(arg218_1, (768, ), (1, )) + assert_size_stride(arg219_1, (768, ), (1, )) + assert_size_stride(arg220_1, (768, ), (1, )) + assert_size_stride(arg221_1, (3072, 768), (768, 1)) + assert_size_stride(arg222_1, (3072, ), (1, )) + assert_size_stride(arg223_1, (768, 3072), (3072, 1)) + assert_size_stride(arg224_1, (768, ), (1, )) + assert_size_stride(arg225_1, (768, ), (1, )) + assert_size_stride(arg226_1, (768, ), (1, )) + assert_size_stride(arg227_1, (2304, 768), (768, 1)) + assert_size_stride(arg228_1, (2304, ), (1, )) + assert_size_stride(arg229_1, (768, 768), (768, 1)) + assert_size_stride(arg230_1, (768, ), (1, )) + assert_size_stride(arg231_1, (768, ), (1, )) + assert_size_stride(arg232_1, (768, ), (1, )) + assert_size_stride(arg233_1, (3072, 768), (768, 1)) + assert_size_stride(arg234_1, (3072, ), (1, )) + assert_size_stride(arg235_1, (768, 3072), (3072, 1)) + assert_size_stride(arg236_1, (768, ), (1, )) + assert_size_stride(arg237_1, (768, ), (1, )) + assert_size_stride(arg238_1, (768, ), (1, )) + assert_size_stride(arg239_1, (2304, 768), (768, 1)) + assert_size_stride(arg240_1, (2304, ), (1, )) + assert_size_stride(arg241_1, (768, 768), (768, 1)) + assert_size_stride(arg242_1, (768, ), (1, )) + assert_size_stride(arg243_1, (768, ), (1, )) + assert_size_stride(arg244_1, (768, ), (1, )) + assert_size_stride(arg245_1, (3072, 768), (768, 1)) + assert_size_stride(arg246_1, (3072, ), (1, )) + assert_size_stride(arg247_1, (768, 3072), (3072, 1)) + assert_size_stride(arg248_1, (768, ), (1, )) + assert_size_stride(arg249_1, (768, ), (1, )) + assert_size_stride(arg250_1, (768, ), (1, )) + assert_size_stride(arg251_1, (2304, 768), (768, 1)) + assert_size_stride(arg252_1, (2304, ), (1, )) + assert_size_stride(arg253_1, (768, 768), (768, 1)) + assert_size_stride(arg254_1, (768, ), (1, )) + assert_size_stride(arg255_1, (768, ), (1, )) + assert_size_stride(arg256_1, (768, ), (1, )) + assert_size_stride(arg257_1, (3072, 768), (768, 1)) + assert_size_stride(arg258_1, (3072, ), (1, )) + assert_size_stride(arg259_1, (768, 3072), (3072, 1)) + assert_size_stride(arg260_1, (768, ), (1, )) + assert_size_stride(arg261_1, (768, ), (1, )) + assert_size_stride(arg262_1, (768, ), (1, )) + assert_size_stride(arg263_1, (2304, 768), (768, 1)) + assert_size_stride(arg264_1, (2304, ), (1, )) + assert_size_stride(arg265_1, (768, 768), (768, 1)) + assert_size_stride(arg266_1, (768, ), (1, )) + assert_size_stride(arg267_1, (768, ), (1, )) + assert_size_stride(arg268_1, (768, ), (1, )) + assert_size_stride(arg269_1, (3072, 768), (768, 1)) + assert_size_stride(arg270_1, (3072, ), (1, )) + assert_size_stride(arg271_1, (768, 3072), (3072, 1)) + assert_size_stride(arg272_1, (768, ), (1, )) + assert_size_stride(arg273_1, (768, ), (1, )) + assert_size_stride(arg274_1, (768, ), (1, )) + assert_size_stride(arg275_1, (2304, 768), (768, 1)) + assert_size_stride(arg276_1, (2304, ), (1, )) + assert_size_stride(arg277_1, (768, 768), (768, 1)) + assert_size_stride(arg278_1, (768, ), (1, )) + assert_size_stride(arg279_1, (768, ), (1, )) + assert_size_stride(arg280_1, (768, ), (1, )) + assert_size_stride(arg281_1, (3072, 768), (768, 1)) + assert_size_stride(arg282_1, (3072, ), (1, )) + assert_size_stride(arg283_1, (768, 3072), (3072, 1)) + assert_size_stride(arg284_1, (768, ), (1, )) + assert_size_stride(arg285_1, (768, ), (1, )) + assert_size_stride(arg286_1, (768, ), (1, )) + assert_size_stride(arg287_1, (2304, 768), (768, 1)) + assert_size_stride(arg288_1, (2304, ), (1, )) + assert_size_stride(arg289_1, (768, 768), (768, 1)) + assert_size_stride(arg290_1, (768, ), (1, )) + assert_size_stride(arg291_1, (768, ), (1, )) + assert_size_stride(arg292_1, (768, ), (1, )) + assert_size_stride(arg293_1, (3072, 768), (768, 1)) + assert_size_stride(arg294_1, (3072, ), (1, )) + assert_size_stride(arg295_1, (768, 3072), (3072, 1)) + assert_size_stride(arg296_1, (768, ), (1, )) + assert_size_stride(arg297_1, (768, ), (1, )) + assert_size_stride(arg298_1, (768, ), (1, )) + assert_size_stride(arg299_1, (50304, 768), (768, 1)) + assert_size_stride(arg300_1, (768, ), (1, )) + assert_size_stride(arg301_1, (768, ), (1, )) + assert_size_stride(arg302_1, (2304, 768), (768, 1)) + assert_size_stride(arg303_1, (2304, ), (1, )) + assert_size_stride(arg304_1, (768, 768), (768, 1)) + assert_size_stride(arg305_1, (768, ), (1, )) + assert_size_stride(arg306_1, (768, ), (1, )) + assert_size_stride(arg307_1, (768, ), (1, )) + assert_size_stride(arg308_1, (3072, 768), (768, 1)) + assert_size_stride(arg309_1, (3072, ), (1, )) + assert_size_stride(arg310_1, (768, 3072), (3072, 1)) + assert_size_stride(arg311_1, (768, ), (1, )) + assert_size_stride(arg312_1, (768, ), (1, )) + assert_size_stride(arg313_1, (768, ), (1, )) + assert_size_stride(arg314_1, (2304, 768), (768, 1)) + assert_size_stride(arg315_1, (2304, ), (1, )) + assert_size_stride(arg316_1, (768, 768), (768, 1)) + assert_size_stride(arg317_1, (768, ), (1, )) + assert_size_stride(arg318_1, (768, ), (1, )) + assert_size_stride(arg319_1, (768, ), (1, )) + assert_size_stride(arg320_1, (3072, 768), (768, 1)) + assert_size_stride(arg321_1, (3072, ), (1, )) + assert_size_stride(arg322_1, (768, 3072), (3072, 1)) + assert_size_stride(arg323_1, (768, ), (1, )) + assert_size_stride(arg324_1, (768, ), (1, )) + assert_size_stride(arg325_1, (768, ), (1, )) + assert_size_stride(arg326_1, (2304, 768), (768, 1)) + assert_size_stride(arg327_1, (2304, ), (1, )) + assert_size_stride(arg328_1, (768, 768), (768, 1)) + assert_size_stride(arg329_1, (768, ), (1, )) + assert_size_stride(arg330_1, (768, ), (1, )) + assert_size_stride(arg331_1, (768, ), (1, )) + assert_size_stride(arg332_1, (3072, 768), (768, 1)) + assert_size_stride(arg333_1, (3072, ), (1, )) + assert_size_stride(arg334_1, (768, 3072), (3072, 1)) + assert_size_stride(arg335_1, (768, ), (1, )) + assert_size_stride(arg336_1, (768, ), (1, )) + assert_size_stride(arg337_1, (768, ), (1, )) + assert_size_stride(arg338_1, (2304, 768), (768, 1)) + assert_size_stride(arg339_1, (2304, ), (1, )) + assert_size_stride(arg340_1, (768, 768), (768, 1)) + assert_size_stride(arg341_1, (768, ), (1, )) + assert_size_stride(arg342_1, (768, ), (1, )) + assert_size_stride(arg343_1, (768, ), (1, )) + assert_size_stride(arg344_1, (3072, 768), (768, 1)) + assert_size_stride(arg345_1, (3072, ), (1, )) + assert_size_stride(arg346_1, (768, 3072), (3072, 1)) + assert_size_stride(arg347_1, (768, ), (1, )) + assert_size_stride(arg348_1, (768, ), (1, )) + assert_size_stride(arg349_1, (768, ), (1, )) + assert_size_stride(arg350_1, (2304, 768), (768, 1)) + assert_size_stride(arg351_1, (2304, ), (1, )) + assert_size_stride(arg352_1, (768, 768), (768, 1)) + assert_size_stride(arg353_1, (768, ), (1, )) + assert_size_stride(arg354_1, (768, ), (1, )) + assert_size_stride(arg355_1, (768, ), (1, )) + assert_size_stride(arg356_1, (3072, 768), (768, 1)) + assert_size_stride(arg357_1, (3072, ), (1, )) + assert_size_stride(arg358_1, (768, 3072), (3072, 1)) + assert_size_stride(arg359_1, (768, ), (1, )) + assert_size_stride(arg360_1, (768, ), (1, )) + assert_size_stride(arg361_1, (768, ), (1, )) + assert_size_stride(arg362_1, (2304, 768), (768, 1)) + assert_size_stride(arg363_1, (2304, ), (1, )) + assert_size_stride(arg364_1, (768, 768), (768, 1)) + assert_size_stride(arg365_1, (768, ), (1, )) + assert_size_stride(arg366_1, (768, ), (1, )) + assert_size_stride(arg367_1, (768, ), (1, )) + assert_size_stride(arg368_1, (3072, 768), (768, 1)) + assert_size_stride(arg369_1, (3072, ), (1, )) + assert_size_stride(arg370_1, (768, 3072), (3072, 1)) + assert_size_stride(arg371_1, (768, ), (1, )) + assert_size_stride(arg372_1, (768, ), (1, )) + assert_size_stride(arg373_1, (768, ), (1, )) + assert_size_stride(arg374_1, (2304, 768), (768, 1)) + assert_size_stride(arg375_1, (2304, ), (1, )) + assert_size_stride(arg376_1, (768, 768), (768, 1)) + assert_size_stride(arg377_1, (768, ), (1, )) + assert_size_stride(arg378_1, (768, ), (1, )) + assert_size_stride(arg379_1, (768, ), (1, )) + assert_size_stride(arg380_1, (3072, 768), (768, 1)) + assert_size_stride(arg381_1, (3072, ), (1, )) + assert_size_stride(arg382_1, (768, 3072), (3072, 1)) + assert_size_stride(arg383_1, (768, ), (1, )) + assert_size_stride(arg384_1, (768, ), (1, )) + assert_size_stride(arg385_1, (768, ), (1, )) + assert_size_stride(arg386_1, (2304, 768), (768, 1)) + assert_size_stride(arg387_1, (2304, ), (1, )) + assert_size_stride(arg388_1, (768, 768), (768, 1)) + assert_size_stride(arg389_1, (768, ), (1, )) + assert_size_stride(arg390_1, (768, ), (1, )) + assert_size_stride(arg391_1, (768, ), (1, )) + assert_size_stride(arg392_1, (3072, 768), (768, 1)) + assert_size_stride(arg393_1, (3072, ), (1, )) + assert_size_stride(arg394_1, (768, 3072), (3072, 1)) + assert_size_stride(arg395_1, (768, ), (1, )) + assert_size_stride(arg396_1, (768, ), (1, )) + assert_size_stride(arg397_1, (768, ), (1, )) + assert_size_stride(arg398_1, (2304, 768), (768, 1)) + assert_size_stride(arg399_1, (2304, ), (1, )) + assert_size_stride(arg400_1, (768, 768), (768, 1)) + assert_size_stride(arg401_1, (768, ), (1, )) + assert_size_stride(arg402_1, (768, ), (1, )) + assert_size_stride(arg403_1, (768, ), (1, )) + assert_size_stride(arg404_1, (3072, 768), (768, 1)) + assert_size_stride(arg405_1, (3072, ), (1, )) + assert_size_stride(arg406_1, (768, 3072), (3072, 1)) + assert_size_stride(arg407_1, (768, ), (1, )) + assert_size_stride(arg408_1, (768, ), (1, )) + assert_size_stride(arg409_1, (768, ), (1, )) + assert_size_stride(arg410_1, (2304, 768), (768, 1)) + assert_size_stride(arg411_1, (2304, ), (1, )) + assert_size_stride(arg412_1, (768, 768), (768, 1)) + assert_size_stride(arg413_1, (768, ), (1, )) + assert_size_stride(arg414_1, (768, ), (1, )) + assert_size_stride(arg415_1, (768, ), (1, )) + assert_size_stride(arg416_1, (3072, 768), (768, 1)) + assert_size_stride(arg417_1, (3072, ), (1, )) + assert_size_stride(arg418_1, (768, 3072), (3072, 1)) + assert_size_stride(arg419_1, (768, ), (1, )) + assert_size_stride(arg420_1, (768, ), (1, )) + assert_size_stride(arg421_1, (768, ), (1, )) + assert_size_stride(arg422_1, (2304, 768), (768, 1)) + assert_size_stride(arg423_1, (2304, ), (1, )) + assert_size_stride(arg424_1, (768, 768), (768, 1)) + assert_size_stride(arg425_1, (768, ), (1, )) + assert_size_stride(arg426_1, (768, ), (1, )) + assert_size_stride(arg427_1, (768, ), (1, )) + assert_size_stride(arg428_1, (3072, 768), (768, 1)) + assert_size_stride(arg429_1, (3072, ), (1, )) + assert_size_stride(arg430_1, (768, 3072), (3072, 1)) + assert_size_stride(arg431_1, (768, ), (1, )) + assert_size_stride(arg432_1, (768, ), (1, )) + assert_size_stride(arg433_1, (768, ), (1, )) + assert_size_stride(arg434_1, (2304, 768), (768, 1)) + assert_size_stride(arg435_1, (2304, ), (1, )) + assert_size_stride(arg436_1, (768, 768), (768, 1)) + assert_size_stride(arg437_1, (768, ), (1, )) + assert_size_stride(arg438_1, (768, ), (1, )) + assert_size_stride(arg439_1, (768, ), (1, )) + assert_size_stride(arg440_1, (3072, 768), (768, 1)) + assert_size_stride(arg441_1, (3072, ), (1, )) + assert_size_stride(arg442_1, (768, 3072), (3072, 1)) + assert_size_stride(arg443_1, (768, ), (1, )) + assert_size_stride(arg444_1, (768, ), (1, )) + assert_size_stride(arg445_1, (768, ), (1, )) + assert_size_stride(arg446_1, (50304, 768), (768, 1)) + assert_size_stride(arg447_1, (768, ), (1, )) + assert_size_stride(arg448_1, (768, ), (1, )) + assert_size_stride(arg449_1, (2304, 768), (768, 1)) + assert_size_stride(arg450_1, (2304, ), (1, )) + assert_size_stride(arg451_1, (768, 768), (768, 1)) + assert_size_stride(arg452_1, (768, ), (1, )) + assert_size_stride(arg453_1, (768, ), (1, )) + assert_size_stride(arg454_1, (768, ), (1, )) + assert_size_stride(arg455_1, (3072, 768), (768, 1)) + assert_size_stride(arg456_1, (3072, ), (1, )) + assert_size_stride(arg457_1, (768, 3072), (3072, 1)) + assert_size_stride(arg458_1, (768, ), (1, )) + assert_size_stride(arg459_1, (768, ), (1, )) + assert_size_stride(arg460_1, (768, ), (1, )) + assert_size_stride(arg461_1, (2304, 768), (768, 1)) + assert_size_stride(arg462_1, (2304, ), (1, )) + assert_size_stride(arg463_1, (768, 768), (768, 1)) + assert_size_stride(arg464_1, (768, ), (1, )) + assert_size_stride(arg465_1, (768, ), (1, )) + assert_size_stride(arg466_1, (768, ), (1, )) + assert_size_stride(arg467_1, (3072, 768), (768, 1)) + assert_size_stride(arg468_1, (3072, ), (1, )) + assert_size_stride(arg469_1, (768, 3072), (3072, 1)) + assert_size_stride(arg470_1, (768, ), (1, )) + assert_size_stride(arg471_1, (768, ), (1, )) + assert_size_stride(arg472_1, (768, ), (1, )) + assert_size_stride(arg473_1, (2304, 768), (768, 1)) + assert_size_stride(arg474_1, (2304, ), (1, )) + assert_size_stride(arg475_1, (768, 768), (768, 1)) + assert_size_stride(arg476_1, (768, ), (1, )) + assert_size_stride(arg477_1, (768, ), (1, )) + assert_size_stride(arg478_1, (768, ), (1, )) + assert_size_stride(arg479_1, (3072, 768), (768, 1)) + assert_size_stride(arg480_1, (3072, ), (1, )) + assert_size_stride(arg481_1, (768, 3072), (3072, 1)) + assert_size_stride(arg482_1, (768, ), (1, )) + assert_size_stride(arg483_1, (768, ), (1, )) + assert_size_stride(arg484_1, (768, ), (1, )) + assert_size_stride(arg485_1, (2304, 768), (768, 1)) + assert_size_stride(arg486_1, (2304, ), (1, )) + assert_size_stride(arg487_1, (768, 768), (768, 1)) + assert_size_stride(arg488_1, (768, ), (1, )) + assert_size_stride(arg489_1, (768, ), (1, )) + assert_size_stride(arg490_1, (768, ), (1, )) + assert_size_stride(arg491_1, (3072, 768), (768, 1)) + assert_size_stride(arg492_1, (3072, ), (1, )) + assert_size_stride(arg493_1, (768, 3072), (3072, 1)) + assert_size_stride(arg494_1, (768, ), (1, )) + assert_size_stride(arg495_1, (768, ), (1, )) + assert_size_stride(arg496_1, (768, ), (1, )) + assert_size_stride(arg497_1, (2304, 768), (768, 1)) + assert_size_stride(arg498_1, (2304, ), (1, )) + assert_size_stride(arg499_1, (768, 768), (768, 1)) + assert_size_stride(arg500_1, (768, ), (1, )) + assert_size_stride(arg501_1, (768, ), (1, )) + assert_size_stride(arg502_1, (768, ), (1, )) + assert_size_stride(arg503_1, (3072, 768), (768, 1)) + assert_size_stride(arg504_1, (3072, ), (1, )) + assert_size_stride(arg505_1, (768, 3072), (3072, 1)) + assert_size_stride(arg506_1, (768, ), (1, )) + assert_size_stride(arg507_1, (768, ), (1, )) + assert_size_stride(arg508_1, (768, ), (1, )) + assert_size_stride(arg509_1, (2304, 768), (768, 1)) + assert_size_stride(arg510_1, (2304, ), (1, )) + assert_size_stride(arg511_1, (768, 768), (768, 1)) + assert_size_stride(arg512_1, (768, ), (1, )) + assert_size_stride(arg513_1, (768, ), (1, )) + assert_size_stride(arg514_1, (768, ), (1, )) + assert_size_stride(arg515_1, (3072, 768), (768, 1)) + assert_size_stride(arg516_1, (3072, ), (1, )) + assert_size_stride(arg517_1, (768, 3072), (3072, 1)) + assert_size_stride(arg518_1, (768, ), (1, )) + assert_size_stride(arg519_1, (768, ), (1, )) + assert_size_stride(arg520_1, (768, ), (1, )) + assert_size_stride(arg521_1, (2304, 768), (768, 1)) + assert_size_stride(arg522_1, (2304, ), (1, )) + assert_size_stride(arg523_1, (768, 768), (768, 1)) + assert_size_stride(arg524_1, (768, ), (1, )) + assert_size_stride(arg525_1, (768, ), (1, )) + assert_size_stride(arg526_1, (768, ), (1, )) + assert_size_stride(arg527_1, (3072, 768), (768, 1)) + assert_size_stride(arg528_1, (3072, ), (1, )) + assert_size_stride(arg529_1, (768, 3072), (3072, 1)) + assert_size_stride(arg530_1, (768, ), (1, )) + assert_size_stride(arg531_1, (768, ), (1, )) + assert_size_stride(arg532_1, (768, ), (1, )) + assert_size_stride(arg533_1, (2304, 768), (768, 1)) + assert_size_stride(arg534_1, (2304, ), (1, )) + assert_size_stride(arg535_1, (768, 768), (768, 1)) + assert_size_stride(arg536_1, (768, ), (1, )) + assert_size_stride(arg537_1, (768, ), (1, )) + assert_size_stride(arg538_1, (768, ), (1, )) + assert_size_stride(arg539_1, (3072, 768), (768, 1)) + assert_size_stride(arg540_1, (3072, ), (1, )) + assert_size_stride(arg541_1, (768, 3072), (3072, 1)) + assert_size_stride(arg542_1, (768, ), (1, )) + assert_size_stride(arg543_1, (768, ), (1, )) + assert_size_stride(arg544_1, (768, ), (1, )) + assert_size_stride(arg545_1, (2304, 768), (768, 1)) + assert_size_stride(arg546_1, (2304, ), (1, )) + assert_size_stride(arg547_1, (768, 768), (768, 1)) + assert_size_stride(arg548_1, (768, ), (1, )) + assert_size_stride(arg549_1, (768, ), (1, )) + assert_size_stride(arg550_1, (768, ), (1, )) + assert_size_stride(arg551_1, (3072, 768), (768, 1)) + assert_size_stride(arg552_1, (3072, ), (1, )) + assert_size_stride(arg553_1, (768, 3072), (3072, 1)) + assert_size_stride(arg554_1, (768, ), (1, )) + assert_size_stride(arg555_1, (768, ), (1, )) + assert_size_stride(arg556_1, (768, ), (1, )) + assert_size_stride(arg557_1, (2304, 768), (768, 1)) + assert_size_stride(arg558_1, (2304, ), (1, )) + assert_size_stride(arg559_1, (768, 768), (768, 1)) + assert_size_stride(arg560_1, (768, ), (1, )) + assert_size_stride(arg561_1, (768, ), (1, )) + assert_size_stride(arg562_1, (768, ), (1, )) + assert_size_stride(arg563_1, (3072, 768), (768, 1)) + assert_size_stride(arg564_1, (3072, ), (1, )) + assert_size_stride(arg565_1, (768, 3072), (3072, 1)) + assert_size_stride(arg566_1, (768, ), (1, )) + assert_size_stride(arg567_1, (768, ), (1, )) + assert_size_stride(arg568_1, (768, ), (1, )) + assert_size_stride(arg569_1, (2304, 768), (768, 1)) + assert_size_stride(arg570_1, (2304, ), (1, )) + assert_size_stride(arg571_1, (768, 768), (768, 1)) + assert_size_stride(arg572_1, (768, ), (1, )) + assert_size_stride(arg573_1, (768, ), (1, )) + assert_size_stride(arg574_1, (768, ), (1, )) + assert_size_stride(arg575_1, (3072, 768), (768, 1)) + assert_size_stride(arg576_1, (3072, ), (1, )) + assert_size_stride(arg577_1, (768, 3072), (3072, 1)) + assert_size_stride(arg578_1, (768, ), (1, )) + assert_size_stride(arg579_1, (768, ), (1, )) + assert_size_stride(arg580_1, (768, ), (1, )) + assert_size_stride(arg581_1, (2304, 768), (768, 1)) + assert_size_stride(arg582_1, (2304, ), (1, )) + assert_size_stride(arg583_1, (768, 768), (768, 1)) + assert_size_stride(arg584_1, (768, ), (1, )) + assert_size_stride(arg585_1, (768, ), (1, )) + assert_size_stride(arg586_1, (768, ), (1, )) + assert_size_stride(arg587_1, (3072, 768), (768, 1)) + assert_size_stride(arg588_1, (3072, ), (1, )) + assert_size_stride(arg589_1, (768, 3072), (3072, 1)) + assert_size_stride(arg590_1, (768, ), (1, )) + assert_size_stride(arg591_1, (768, ), (1, )) + assert_size_stride(arg592_1, (768, ), (1, )) + assert_size_stride(arg593_1, (), ()) + assert_size_stride(arg594_1, (), ()) + assert_size_stride(arg595_1, (), ()) + assert_size_stride(arg596_1, (), ()) + assert_size_stride(arg597_1, (), ()) + assert_size_stride(arg598_1, (), ()) + assert_size_stride(arg599_1, (), ()) + assert_size_stride(arg600_1, (), ()) + assert_size_stride(arg601_1, (), ()) + assert_size_stride(arg602_1, (), ()) + assert_size_stride(arg603_1, (), ()) + assert_size_stride(arg604_1, (), ()) + assert_size_stride(arg605_1, (), ()) + assert_size_stride(arg606_1, (), ()) + assert_size_stride(arg607_1, (), ()) + assert_size_stride(arg608_1, (), ()) + assert_size_stride(arg609_1, (), ()) + assert_size_stride(arg610_1, (), ()) + assert_size_stride(arg611_1, (), ()) + assert_size_stride(arg612_1, (), ()) + assert_size_stride(arg613_1, (), ()) + assert_size_stride(arg614_1, (), ()) + assert_size_stride(arg615_1, (), ()) + assert_size_stride(arg616_1, (), ()) + assert_size_stride(arg617_1, (), ()) + assert_size_stride(arg618_1, (), ()) + assert_size_stride(arg619_1, (), ()) + assert_size_stride(arg620_1, (), ()) + assert_size_stride(arg621_1, (), ()) + assert_size_stride(arg622_1, (), ()) + assert_size_stride(arg623_1, (), ()) + assert_size_stride(arg624_1, (), ()) + assert_size_stride(arg625_1, (), ()) + assert_size_stride(arg626_1, (), ()) + assert_size_stride(arg627_1, (), ()) + assert_size_stride(arg628_1, (), ()) + assert_size_stride(arg629_1, (), ()) + assert_size_stride(arg630_1, (), ()) + assert_size_stride(arg631_1, (), ()) + assert_size_stride(arg632_1, (), ()) + assert_size_stride(arg633_1, (), ()) + assert_size_stride(arg634_1, (), ()) + assert_size_stride(arg635_1, (), ()) + assert_size_stride(arg636_1, (), ()) + assert_size_stride(arg637_1, (), ()) + assert_size_stride(arg638_1, (), ()) + assert_size_stride(arg639_1, (), ()) + assert_size_stride(arg640_1, (), ()) + assert_size_stride(arg641_1, (), ()) + assert_size_stride(arg642_1, (), ()) + assert_size_stride(arg643_1, (), ()) + assert_size_stride(arg644_1, (), ()) + assert_size_stride(arg645_1, (), ()) + assert_size_stride(arg646_1, (), ()) + assert_size_stride(arg647_1, (), ()) + assert_size_stride(arg648_1, (), ()) + assert_size_stride(arg649_1, (), ()) + assert_size_stride(arg650_1, (), ()) + assert_size_stride(arg651_1, (), ()) + assert_size_stride(arg652_1, (), ()) + assert_size_stride(arg653_1, (), ()) + assert_size_stride(arg654_1, (), ()) + assert_size_stride(arg655_1, (), ()) + assert_size_stride(arg656_1, (), ()) + assert_size_stride(arg657_1, (), ()) + assert_size_stride(arg658_1, (), ()) + assert_size_stride(arg659_1, (), ()) + assert_size_stride(arg660_1, (), ()) + assert_size_stride(arg661_1, (), ()) + assert_size_stride(arg662_1, (), ()) + assert_size_stride(arg663_1, (), ()) + assert_size_stride(arg664_1, (), ()) + assert_size_stride(arg665_1, (), ()) + assert_size_stride(arg666_1, (), ()) + assert_size_stride(arg667_1, (), ()) + assert_size_stride(arg668_1, (), ()) + assert_size_stride(arg669_1, (), ()) + assert_size_stride(arg670_1, (), ()) + assert_size_stride(arg671_1, (), ()) + assert_size_stride(arg672_1, (), ()) + assert_size_stride(arg673_1, (), ()) + assert_size_stride(arg674_1, (), ()) + assert_size_stride(arg675_1, (), ()) + assert_size_stride(arg676_1, (), ()) + assert_size_stride(arg677_1, (), ()) + assert_size_stride(arg678_1, (), ()) + assert_size_stride(arg679_1, (), ()) + assert_size_stride(arg680_1, (), ()) + assert_size_stride(arg681_1, (), ()) + assert_size_stride(arg682_1, (), ()) + assert_size_stride(arg683_1, (), ()) + assert_size_stride(arg684_1, (), ()) + assert_size_stride(arg685_1, (), ()) + assert_size_stride(arg686_1, (), ()) + assert_size_stride(arg687_1, (), ()) + assert_size_stride(arg688_1, (), ()) + assert_size_stride(arg689_1, (), ()) + assert_size_stride(arg690_1, (), ()) + assert_size_stride(arg691_1, (), ()) + assert_size_stride(arg692_1, (), ()) + assert_size_stride(arg693_1, (), ()) + assert_size_stride(arg694_1, (), ()) + assert_size_stride(arg695_1, (), ()) + assert_size_stride(arg696_1, (), ()) + assert_size_stride(arg697_1, (), ()) + assert_size_stride(arg698_1, (), ()) + assert_size_stride(arg699_1, (), ()) + assert_size_stride(arg700_1, (), ()) + assert_size_stride(arg701_1, (), ()) + assert_size_stride(arg702_1, (), ()) + assert_size_stride(arg703_1, (), ()) + assert_size_stride(arg704_1, (), ()) + assert_size_stride(arg705_1, (), ()) + assert_size_stride(arg706_1, (), ()) + assert_size_stride(arg707_1, (), ()) + assert_size_stride(arg708_1, (), ()) + assert_size_stride(arg709_1, (), ()) + assert_size_stride(arg710_1, (), ()) + assert_size_stride(arg711_1, (), ()) + assert_size_stride(arg712_1, (), ()) + assert_size_stride(arg713_1, (), ()) + assert_size_stride(arg714_1, (), ()) + assert_size_stride(arg715_1, (), ()) + assert_size_stride(arg716_1, (), ()) + assert_size_stride(arg717_1, (), ()) + assert_size_stride(arg718_1, (), ()) + assert_size_stride(arg719_1, (), ()) + assert_size_stride(arg720_1, (), ()) + assert_size_stride(arg721_1, (), ()) + assert_size_stride(arg722_1, (), ()) + assert_size_stride(arg723_1, (), ()) + assert_size_stride(arg724_1, (), ()) + assert_size_stride(arg725_1, (), ()) + assert_size_stride(arg726_1, (), ()) + assert_size_stride(arg727_1, (), ()) + assert_size_stride(arg728_1, (), ()) + assert_size_stride(arg729_1, (), ()) + assert_size_stride(arg730_1, (), ()) + assert_size_stride(arg731_1, (), ()) + assert_size_stride(arg732_1, (), ()) + assert_size_stride(arg733_1, (), ()) + assert_size_stride(arg734_1, (), ()) + assert_size_stride(arg735_1, (), ()) + assert_size_stride(arg736_1, (), ()) + assert_size_stride(arg737_1, (), ()) + assert_size_stride(arg738_1, (), ()) + assert_size_stride(arg739_1, (), ()) + with torch.cuda._DeviceGuard(0): + torch.cuda.set_device(0) + # Source Nodes: [], Original ATen: [] + stream0 = get_raw_stream(0) + triton_for_fused_0.run(arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, grid=(83, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_1.run(arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1, grid=(65, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_2.run(arg299_1, arg151_1, arg446_1, arg0_1, arg593_1, arg149_1, arg152_1, arg150_1, arg1_1, arg148_1, arg300_1, arg153_1, arg447_1, arg2_1, arg594_1, arg301_1, arg154_1, arg448_1, arg3_1, arg595_1, arg302_1, arg155_1, arg449_1, arg4_1, arg596_1, arg303_1, arg156_1, arg450_1, arg5_1, arg597_1, arg304_1, arg157_1, arg451_1, arg6_1, arg598_1, arg305_1, arg158_1, arg452_1, arg7_1, arg599_1, arg306_1, arg159_1, arg453_1, arg8_1, arg600_1, arg307_1, arg160_1, arg454_1, arg9_1, arg601_1, arg308_1, arg161_1, arg455_1, arg10_1, arg602_1, arg309_1, arg162_1, arg456_1, arg11_1, arg603_1, arg310_1, arg163_1, arg457_1, arg12_1, arg604_1, arg311_1, arg164_1, arg458_1, arg13_1, arg605_1, arg312_1, arg165_1, arg459_1, arg14_1, arg606_1, arg313_1, arg166_1, arg460_1, arg15_1, arg607_1, arg314_1, arg167_1, arg461_1, arg16_1, arg608_1, arg315_1, arg168_1, arg462_1, arg17_1, arg609_1, arg316_1, arg169_1, arg463_1, arg18_1, arg610_1, arg299_1, arg0_1, arg446_1, arg149_1, arg1_1, arg150_1, arg300_1, arg2_1, arg447_1, arg301_1, arg3_1, arg448_1, arg302_1, arg4_1, arg449_1, arg303_1, arg5_1, arg450_1, arg304_1, arg6_1, arg451_1, arg305_1, arg7_1, arg452_1, arg306_1, arg8_1, arg453_1, arg307_1, arg9_1, arg454_1, arg308_1, arg10_1, arg455_1, arg309_1, arg11_1, arg456_1, arg310_1, arg12_1, arg457_1, arg311_1, arg13_1, arg458_1, arg312_1, arg14_1, arg459_1, arg313_1, arg15_1, arg460_1, arg314_1, arg16_1, arg461_1, arg315_1, arg17_1, arg462_1, arg316_1, arg18_1, arg463_1, grid=(47729, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_3.run(arg317_1, arg170_1, arg464_1, arg19_1, arg611_1, arg318_1, arg171_1, arg465_1, arg20_1, arg612_1, arg319_1, arg172_1, arg466_1, arg21_1, arg613_1, arg320_1, arg173_1, arg467_1, arg22_1, arg614_1, arg321_1, arg174_1, arg468_1, arg23_1, arg615_1, arg322_1, arg175_1, arg469_1, arg24_1, arg616_1, arg323_1, arg176_1, arg470_1, arg25_1, arg617_1, arg324_1, arg177_1, arg471_1, arg26_1, arg618_1, arg325_1, arg178_1, arg472_1, arg27_1, arg619_1, arg326_1, arg179_1, arg473_1, arg28_1, arg620_1, arg327_1, arg180_1, arg474_1, arg29_1, arg621_1, arg328_1, arg181_1, arg475_1, arg30_1, arg622_1, arg329_1, arg182_1, arg476_1, arg31_1, arg623_1, arg330_1, arg183_1, arg477_1, arg32_1, arg624_1, arg331_1, arg184_1, arg478_1, arg33_1, arg625_1, arg332_1, arg185_1, arg479_1, arg34_1, arg626_1, arg333_1, arg186_1, arg480_1, arg35_1, arg627_1, arg334_1, arg187_1, arg481_1, arg36_1, arg628_1, arg335_1, arg188_1, arg482_1, arg37_1, arg629_1, arg317_1, arg19_1, arg464_1, arg318_1, arg20_1, arg465_1, arg319_1, arg21_1, arg466_1, arg320_1, arg22_1, arg467_1, arg321_1, arg23_1, arg468_1, arg322_1, arg24_1, arg469_1, arg323_1, arg25_1, arg470_1, arg324_1, arg26_1, arg471_1, arg325_1, arg27_1, arg472_1, arg326_1, arg28_1, arg473_1, arg327_1, arg29_1, arg474_1, arg328_1, arg30_1, arg475_1, arg329_1, arg31_1, arg476_1, arg330_1, arg32_1, arg477_1, arg331_1, arg33_1, arg478_1, arg332_1, arg34_1, arg479_1, arg333_1, arg35_1, arg480_1, arg334_1, arg36_1, arg481_1, arg335_1, arg37_1, arg482_1, grid=(11539, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_4.run(arg336_1, arg189_1, arg483_1, arg38_1, arg630_1, arg337_1, arg190_1, arg484_1, arg39_1, arg631_1, arg338_1, arg191_1, arg485_1, arg40_1, arg632_1, arg339_1, arg192_1, arg486_1, arg41_1, arg633_1, arg340_1, arg193_1, arg487_1, arg42_1, arg634_1, arg341_1, arg194_1, arg488_1, arg43_1, arg635_1, arg342_1, arg195_1, arg489_1, arg44_1, arg636_1, arg343_1, arg196_1, arg490_1, arg45_1, arg637_1, arg344_1, arg197_1, arg491_1, arg46_1, arg638_1, arg345_1, arg198_1, arg492_1, arg47_1, arg639_1, arg346_1, arg199_1, arg493_1, arg48_1, arg640_1, arg347_1, arg200_1, arg494_1, arg49_1, arg641_1, arg348_1, arg201_1, arg495_1, arg50_1, arg642_1, arg349_1, arg202_1, arg496_1, arg51_1, arg643_1, arg350_1, arg203_1, arg497_1, arg52_1, arg644_1, arg351_1, arg204_1, arg498_1, arg53_1, arg645_1, arg352_1, arg205_1, arg499_1, arg54_1, arg646_1, arg353_1, arg206_1, arg500_1, arg55_1, arg647_1, arg354_1, arg207_1, arg501_1, arg56_1, arg648_1, arg336_1, arg38_1, arg483_1, arg337_1, arg39_1, arg484_1, arg338_1, arg40_1, arg485_1, arg339_1, arg41_1, arg486_1, arg340_1, arg42_1, arg487_1, arg341_1, arg43_1, arg488_1, arg342_1, arg44_1, arg489_1, arg343_1, arg45_1, arg490_1, arg344_1, arg46_1, arg491_1, arg345_1, arg47_1, arg492_1, arg346_1, arg48_1, arg493_1, arg347_1, arg49_1, arg494_1, arg348_1, arg50_1, arg495_1, arg349_1, arg51_1, arg496_1, arg350_1, arg52_1, arg497_1, arg351_1, arg53_1, arg498_1, arg352_1, arg54_1, arg499_1, arg353_1, arg55_1, arg500_1, arg354_1, arg56_1, arg501_1, grid=(9235, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_5.run(arg355_1, arg208_1, arg502_1, arg57_1, arg649_1, arg356_1, arg209_1, arg503_1, arg58_1, arg650_1, arg357_1, arg210_1, arg504_1, arg59_1, arg651_1, arg358_1, arg211_1, arg505_1, arg60_1, arg652_1, arg359_1, arg212_1, arg506_1, arg61_1, arg653_1, arg360_1, arg213_1, arg507_1, arg62_1, arg654_1, arg361_1, arg214_1, arg508_1, arg63_1, arg655_1, arg362_1, arg215_1, arg509_1, arg64_1, arg656_1, arg363_1, arg216_1, arg510_1, arg65_1, arg657_1, arg364_1, arg217_1, arg511_1, arg66_1, arg658_1, arg365_1, arg218_1, arg512_1, arg67_1, arg659_1, arg366_1, arg219_1, arg513_1, arg68_1, arg660_1, arg367_1, arg220_1, arg514_1, arg69_1, arg661_1, arg368_1, arg221_1, arg515_1, arg70_1, arg662_1, arg369_1, arg222_1, arg516_1, arg71_1, arg663_1, arg370_1, arg223_1, arg517_1, arg72_1, arg664_1, arg371_1, arg224_1, arg518_1, arg73_1, arg665_1, arg372_1, arg225_1, arg519_1, arg74_1, arg666_1, arg373_1, arg226_1, arg520_1, arg75_1, arg667_1, arg355_1, arg57_1, arg502_1, arg356_1, arg58_1, arg503_1, arg357_1, arg59_1, arg504_1, arg358_1, arg60_1, arg505_1, arg359_1, arg61_1, arg506_1, arg360_1, arg62_1, arg507_1, arg361_1, arg63_1, arg508_1, arg362_1, arg64_1, arg509_1, arg363_1, arg65_1, arg510_1, arg364_1, arg66_1, arg511_1, arg365_1, arg67_1, arg512_1, arg366_1, arg68_1, arg513_1, arg367_1, arg69_1, arg514_1, arg368_1, arg70_1, arg515_1, arg369_1, arg71_1, arg516_1, arg370_1, arg72_1, arg517_1, arg371_1, arg73_1, arg518_1, arg372_1, arg74_1, arg519_1, arg373_1, arg75_1, arg520_1, grid=(11539, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_6.run(arg374_1, arg227_1, arg521_1, arg76_1, arg668_1, arg375_1, arg228_1, arg522_1, arg77_1, arg669_1, arg376_1, arg229_1, arg523_1, arg78_1, arg670_1, arg377_1, arg230_1, arg524_1, arg79_1, arg671_1, arg378_1, arg231_1, arg525_1, arg80_1, arg672_1, arg379_1, arg232_1, arg526_1, arg81_1, arg673_1, arg380_1, arg233_1, arg527_1, arg82_1, arg674_1, arg381_1, arg234_1, arg528_1, arg83_1, arg675_1, arg382_1, arg235_1, arg529_1, arg84_1, arg676_1, arg383_1, arg236_1, arg530_1, arg85_1, arg677_1, arg384_1, arg237_1, arg531_1, arg86_1, arg678_1, arg385_1, arg238_1, arg532_1, arg87_1, arg679_1, arg386_1, arg239_1, arg533_1, arg88_1, arg680_1, arg387_1, arg240_1, arg534_1, arg89_1, arg681_1, arg388_1, arg241_1, arg535_1, arg90_1, arg682_1, arg389_1, arg242_1, arg536_1, arg91_1, arg683_1, arg390_1, arg243_1, arg537_1, arg92_1, arg684_1, arg391_1, arg244_1, arg538_1, arg93_1, arg685_1, arg392_1, arg245_1, arg539_1, arg94_1, arg686_1, arg374_1, arg76_1, arg521_1, arg375_1, arg77_1, arg522_1, arg376_1, arg78_1, arg523_1, arg377_1, arg79_1, arg524_1, arg378_1, arg80_1, arg525_1, arg379_1, arg81_1, arg526_1, arg380_1, arg82_1, arg527_1, arg381_1, arg83_1, arg528_1, arg382_1, arg84_1, arg529_1, arg383_1, arg85_1, arg530_1, arg384_1, arg86_1, arg531_1, arg385_1, arg87_1, arg532_1, arg386_1, arg88_1, arg533_1, arg387_1, arg89_1, arg534_1, arg388_1, arg90_1, arg535_1, arg389_1, arg91_1, arg536_1, arg390_1, arg92_1, arg537_1, arg391_1, arg93_1, arg538_1, arg392_1, arg94_1, arg539_1, grid=(11538, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_7.run(arg393_1, arg246_1, arg540_1, arg95_1, arg687_1, arg394_1, arg247_1, arg541_1, arg96_1, arg688_1, arg395_1, arg248_1, arg542_1, arg97_1, arg689_1, arg396_1, arg249_1, arg543_1, arg98_1, arg690_1, arg397_1, arg250_1, arg544_1, arg99_1, arg691_1, arg398_1, arg251_1, arg545_1, arg100_1, arg692_1, arg399_1, arg252_1, arg546_1, arg101_1, arg693_1, arg400_1, arg253_1, arg547_1, arg102_1, arg694_1, arg401_1, arg254_1, arg548_1, arg103_1, arg695_1, arg402_1, arg255_1, arg549_1, arg104_1, arg696_1, arg403_1, arg256_1, arg550_1, arg105_1, arg697_1, arg404_1, arg257_1, arg551_1, arg106_1, arg698_1, arg405_1, arg258_1, arg552_1, arg107_1, arg699_1, arg406_1, arg259_1, arg553_1, arg108_1, arg700_1, arg407_1, arg260_1, arg554_1, arg109_1, arg701_1, arg408_1, arg261_1, arg555_1, arg110_1, arg702_1, arg409_1, arg262_1, arg556_1, arg111_1, arg703_1, arg410_1, arg263_1, arg557_1, arg112_1, arg704_1, arg411_1, arg264_1, arg558_1, arg113_1, arg705_1, arg393_1, arg95_1, arg540_1, arg394_1, arg96_1, arg541_1, arg395_1, arg97_1, arg542_1, arg396_1, arg98_1, arg543_1, arg397_1, arg99_1, arg544_1, arg398_1, arg100_1, arg545_1, arg399_1, arg101_1, arg546_1, arg400_1, arg102_1, arg547_1, arg401_1, arg103_1, arg548_1, arg402_1, arg104_1, arg549_1, arg403_1, arg105_1, arg550_1, arg404_1, arg106_1, arg551_1, arg405_1, arg107_1, arg552_1, arg406_1, arg108_1, arg553_1, arg407_1, arg109_1, arg554_1, arg408_1, arg110_1, arg555_1, arg409_1, arg111_1, arg556_1, arg410_1, arg112_1, arg557_1, arg411_1, arg113_1, arg558_1, grid=(10965, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_8.run(arg412_1, arg265_1, arg559_1, arg114_1, arg706_1, arg413_1, arg266_1, arg560_1, arg115_1, arg707_1, arg414_1, arg267_1, arg561_1, arg116_1, arg708_1, arg415_1, arg268_1, arg562_1, arg117_1, arg709_1, arg416_1, arg269_1, arg563_1, arg118_1, arg710_1, arg417_1, arg270_1, arg564_1, arg119_1, arg711_1, arg418_1, arg271_1, arg565_1, arg120_1, arg712_1, arg419_1, arg272_1, arg566_1, arg121_1, arg713_1, arg420_1, arg273_1, arg567_1, arg122_1, arg714_1, arg421_1, arg274_1, arg568_1, arg123_1, arg715_1, arg422_1, arg275_1, arg569_1, arg124_1, arg716_1, arg423_1, arg276_1, arg570_1, arg125_1, arg717_1, arg424_1, arg277_1, arg571_1, arg126_1, arg718_1, arg425_1, arg278_1, arg572_1, arg127_1, arg719_1, arg426_1, arg279_1, arg573_1, arg128_1, arg720_1, arg427_1, arg280_1, arg574_1, arg129_1, arg721_1, arg428_1, arg281_1, arg575_1, arg130_1, arg722_1, arg429_1, arg282_1, arg576_1, arg131_1, arg723_1, arg430_1, arg283_1, arg577_1, arg132_1, arg724_1, arg412_1, arg114_1, arg559_1, arg413_1, arg115_1, arg560_1, arg414_1, arg116_1, arg561_1, arg415_1, arg117_1, arg562_1, arg416_1, arg118_1, arg563_1, arg417_1, arg119_1, arg564_1, arg418_1, arg120_1, arg565_1, arg419_1, arg121_1, arg566_1, arg420_1, arg122_1, arg567_1, arg421_1, arg123_1, arg568_1, arg422_1, arg124_1, arg569_1, arg423_1, arg125_1, arg570_1, arg424_1, arg126_1, arg571_1, arg425_1, arg127_1, arg572_1, arg426_1, arg128_1, arg573_1, arg427_1, arg129_1, arg574_1, arg428_1, arg130_1, arg575_1, arg429_1, arg131_1, arg576_1, arg430_1, arg132_1, arg577_1, grid=(12114, 1, 1), stream=stream0) + # Source Nodes: [], Original ATen: [] + triton_for_fused_9.run(arg431_1, arg284_1, arg578_1, arg133_1, arg725_1, arg432_1, arg285_1, arg579_1, arg134_1, arg726_1, arg433_1, arg286_1, arg580_1, arg135_1, arg727_1, arg434_1, arg287_1, arg581_1, arg136_1, arg728_1, arg435_1, arg288_1, arg582_1, arg137_1, arg729_1, arg436_1, arg289_1, arg583_1, arg138_1, arg730_1, arg437_1, arg290_1, arg584_1, arg139_1, arg731_1, arg438_1, arg291_1, arg585_1, arg140_1, arg732_1, arg439_1, arg292_1, arg586_1, arg141_1, arg733_1, arg440_1, arg293_1, arg587_1, arg142_1, arg734_1, arg441_1, arg294_1, arg588_1, arg143_1, arg735_1, arg442_1, arg295_1, arg589_1, arg144_1, arg736_1, arg443_1, arg296_1, arg590_1, arg145_1, arg737_1, arg444_1, arg297_1, arg591_1, arg146_1, arg738_1, arg445_1, arg298_1, arg592_1, arg147_1, arg739_1, arg431_1, arg133_1, arg578_1, arg432_1, arg134_1, arg579_1, arg433_1, arg135_1, arg580_1, arg434_1, arg136_1, arg581_1, arg435_1, arg137_1, arg582_1, arg436_1, arg138_1, arg583_1, arg437_1, arg139_1, arg584_1, arg438_1, arg140_1, arg585_1, arg439_1, arg141_1, arg586_1, arg440_1, arg142_1, arg587_1, arg441_1, arg143_1, arg588_1, arg442_1, arg144_1, arg589_1, arg443_1, arg145_1, arg590_1, arg444_1, arg146_1, arg591_1, arg445_1, arg147_1, arg592_1, grid=(6927, 1, 1), stream=stream0) + del arg0_1 + del arg100_1 + del arg101_1 + del arg102_1 + del arg103_1 + del arg104_1 + del arg105_1 + del arg106_1 + del arg107_1 + del arg108_1 + del arg109_1 + del arg10_1 + del arg110_1 + del arg111_1 + del arg112_1 + del arg113_1 + del arg114_1 + del arg115_1 + del arg116_1 + del arg117_1 + del arg118_1 + del arg119_1 + del arg11_1 + del arg120_1 + del arg121_1 + del arg122_1 + del arg123_1 + del arg124_1 + del arg125_1 + del arg126_1 + del arg127_1 + del arg128_1 + del arg129_1 + del arg12_1 + del arg130_1 + del arg131_1 + del arg132_1 + del arg133_1 + del arg134_1 + del arg135_1 + del arg136_1 + del arg137_1 + del arg138_1 + del arg139_1 + del arg13_1 + del arg140_1 + del arg141_1 + del arg142_1 + del arg143_1 + del arg144_1 + del arg145_1 + del arg146_1 + del arg147_1 + del arg148_1 + del arg149_1 + del arg14_1 + del arg150_1 + del arg151_1 + del arg152_1 + del arg153_1 + del arg154_1 + del arg155_1 + del arg156_1 + del arg157_1 + del arg158_1 + del arg159_1 + del arg15_1 + del arg160_1 + del arg161_1 + del arg162_1 + del arg163_1 + del arg164_1 + del arg165_1 + del arg166_1 + del arg167_1 + del arg168_1 + del arg169_1 + del arg16_1 + del arg170_1 + del arg171_1 + del arg172_1 + del arg173_1 + del arg174_1 + del arg175_1 + del arg176_1 + del arg177_1 + del arg178_1 + del arg179_1 + del arg17_1 + del arg180_1 + del arg181_1 + del arg182_1 + del arg183_1 + del arg184_1 + del arg185_1 + del arg186_1 + del arg187_1 + del arg188_1 + del arg189_1 + del arg18_1 + del arg190_1 + del arg191_1 + del arg192_1 + del arg193_1 + del arg194_1 + del arg195_1 + del arg196_1 + del arg197_1 + del arg198_1 + del arg199_1 + del arg19_1 + del arg1_1 + del arg200_1 + del arg201_1 + del arg202_1 + del arg203_1 + del arg204_1 + del arg205_1 + del arg206_1 + del arg207_1 + del arg208_1 + del arg209_1 + del arg20_1 + del arg210_1 + del arg211_1 + del arg212_1 + del arg213_1 + del arg214_1 + del arg215_1 + del arg216_1 + del arg217_1 + del arg218_1 + del arg219_1 + del arg21_1 + del arg220_1 + del arg221_1 + del arg222_1 + del arg223_1 + del arg224_1 + del arg225_1 + del arg226_1 + del arg227_1 + del arg228_1 + del arg229_1 + del arg22_1 + del arg230_1 + del arg231_1 + del arg232_1 + del arg233_1 + del arg234_1 + del arg235_1 + del arg236_1 + del arg237_1 + del arg238_1 + del arg239_1 + del arg23_1 + del arg240_1 + del arg241_1 + del arg242_1 + del arg243_1 + del arg244_1 + del arg245_1 + del arg246_1 + del arg247_1 + del arg248_1 + del arg249_1 + del arg24_1 + del arg250_1 + del arg251_1 + del arg252_1 + del arg253_1 + del arg254_1 + del arg255_1 + del arg256_1 + del arg257_1 + del arg258_1 + del arg259_1 + del arg25_1 + del arg260_1 + del arg261_1 + del arg262_1 + del arg263_1 + del arg264_1 + del arg265_1 + del arg266_1 + del arg267_1 + del arg268_1 + del arg269_1 + del arg26_1 + del arg270_1 + del arg271_1 + del arg272_1 + del arg273_1 + del arg274_1 + del arg275_1 + del arg276_1 + del arg277_1 + del arg278_1 + del arg279_1 + del arg27_1 + del arg280_1 + del arg281_1 + del arg282_1 + del arg283_1 + del arg284_1 + del arg285_1 + del arg286_1 + del arg287_1 + del arg288_1 + del arg289_1 + del arg28_1 + del arg290_1 + del arg291_1 + del arg292_1 + del arg293_1 + del arg294_1 + del arg295_1 + del arg296_1 + del arg297_1 + del arg298_1 + del arg299_1 + del arg29_1 + del arg2_1 + del arg300_1 + del arg301_1 + del arg302_1 + del arg303_1 + del arg304_1 + del arg305_1 + del arg306_1 + del arg307_1 + del arg308_1 + del arg309_1 + del arg30_1 + del arg310_1 + del arg311_1 + del arg312_1 + del arg313_1 + del arg314_1 + del arg315_1 + del arg316_1 + del arg317_1 + del arg318_1 + del arg319_1 + del arg31_1 + del arg320_1 + del arg321_1 + del arg322_1 + del arg323_1 + del arg324_1 + del arg325_1 + del arg326_1 + del arg327_1 + del arg328_1 + del arg329_1 + del arg32_1 + del arg330_1 + del arg331_1 + del arg332_1 + del arg333_1 + del arg334_1 + del arg335_1 + del arg336_1 + del arg337_1 + del arg338_1 + del arg339_1 + del arg33_1 + del arg340_1 + del arg341_1 + del arg342_1 + del arg343_1 + del arg344_1 + del arg345_1 + del arg346_1 + del arg347_1 + del arg348_1 + del arg349_1 + del arg34_1 + del arg350_1 + del arg351_1 + del arg352_1 + del arg353_1 + del arg354_1 + del arg355_1 + del arg356_1 + del arg357_1 + del arg358_1 + del arg359_1 + del arg35_1 + del arg360_1 + del arg361_1 + del arg362_1 + del arg363_1 + del arg364_1 + del arg365_1 + del arg366_1 + del arg367_1 + del arg368_1 + del arg369_1 + del arg36_1 + del arg370_1 + del arg371_1 + del arg372_1 + del arg373_1 + del arg374_1 + del arg375_1 + del arg376_1 + del arg377_1 + del arg378_1 + del arg379_1 + del arg37_1 + del arg380_1 + del arg381_1 + del arg382_1 + del arg383_1 + del arg384_1 + del arg385_1 + del arg386_1 + del arg387_1 + del arg388_1 + del arg389_1 + del arg38_1 + del arg390_1 + del arg391_1 + del arg392_1 + del arg393_1 + del arg394_1 + del arg395_1 + del arg396_1 + del arg397_1 + del arg398_1 + del arg399_1 + del arg39_1 + del arg3_1 + del arg400_1 + del arg401_1 + del arg402_1 + del arg403_1 + del arg404_1 + del arg405_1 + del arg406_1 + del arg407_1 + del arg408_1 + del arg409_1 + del arg40_1 + del arg410_1 + del arg411_1 + del arg412_1 + del arg413_1 + del arg414_1 + del arg415_1 + del arg416_1 + del arg417_1 + del arg418_1 + del arg419_1 + del arg41_1 + del arg420_1 + del arg421_1 + del arg422_1 + del arg423_1 + del arg424_1 + del arg425_1 + del arg426_1 + del arg427_1 + del arg428_1 + del arg429_1 + del arg42_1 + del arg430_1 + del arg431_1 + del arg432_1 + del arg433_1 + del arg434_1 + del arg435_1 + del arg436_1 + del arg437_1 + del arg438_1 + del arg439_1 + del arg43_1 + del arg440_1 + del arg441_1 + del arg442_1 + del arg443_1 + del arg444_1 + del arg445_1 + del arg446_1 + del arg447_1 + del arg448_1 + del arg449_1 + del arg44_1 + del arg450_1 + del arg451_1 + del arg452_1 + del arg453_1 + del arg454_1 + del arg455_1 + del arg456_1 + del arg457_1 + del arg458_1 + del arg459_1 + del arg45_1 + del arg460_1 + del arg461_1 + del arg462_1 + del arg463_1 + del arg464_1 + del arg465_1 + del arg466_1 + del arg467_1 + del arg468_1 + del arg469_1 + del arg46_1 + del arg470_1 + del arg471_1 + del arg472_1 + del arg473_1 + del arg474_1 + del arg475_1 + del arg476_1 + del arg477_1 + del arg478_1 + del arg479_1 + del arg47_1 + del arg480_1 + del arg481_1 + del arg482_1 + del arg483_1 + del arg484_1 + del arg485_1 + del arg486_1 + del arg487_1 + del arg488_1 + del arg489_1 + del arg48_1 + del arg490_1 + del arg491_1 + del arg492_1 + del arg493_1 + del arg494_1 + del arg495_1 + del arg496_1 + del arg497_1 + del arg498_1 + del arg499_1 + del arg49_1 + del arg4_1 + del arg500_1 + del arg501_1 + del arg502_1 + del arg503_1 + del arg504_1 + del arg505_1 + del arg506_1 + del arg507_1 + del arg508_1 + del arg509_1 + del arg50_1 + del arg510_1 + del arg511_1 + del arg512_1 + del arg513_1 + del arg514_1 + del arg515_1 + del arg516_1 + del arg517_1 + del arg518_1 + del arg519_1 + del arg51_1 + del arg520_1 + del arg521_1 + del arg522_1 + del arg523_1 + del arg524_1 + del arg525_1 + del arg526_1 + del arg527_1 + del arg528_1 + del arg529_1 + del arg52_1 + del arg530_1 + del arg531_1 + del arg532_1 + del arg533_1 + del arg534_1 + del arg535_1 + del arg536_1 + del arg537_1 + del arg538_1 + del arg539_1 + del arg53_1 + del arg540_1 + del arg541_1 + del arg542_1 + del arg543_1 + del arg544_1 + del arg545_1 + del arg546_1 + del arg547_1 + del arg548_1 + del arg549_1 + del arg54_1 + del arg550_1 + del arg551_1 + del arg552_1 + del arg553_1 + del arg554_1 + del arg555_1 + del arg556_1 + del arg557_1 + del arg558_1 + del arg559_1 + del arg55_1 + del arg560_1 + del arg561_1 + del arg562_1 + del arg563_1 + del arg564_1 + del arg565_1 + del arg566_1 + del arg567_1 + del arg568_1 + del arg569_1 + del arg56_1 + del arg570_1 + del arg571_1 + del arg572_1 + del arg573_1 + del arg574_1 + del arg575_1 + del arg576_1 + del arg577_1 + del arg578_1 + del arg579_1 + del arg57_1 + del arg580_1 + del arg581_1 + del arg582_1 + del arg583_1 + del arg584_1 + del arg585_1 + del arg586_1 + del arg587_1 + del arg588_1 + del arg589_1 + del arg58_1 + del arg590_1 + del arg591_1 + del arg592_1 + del arg593_1 + del arg594_1 + del arg595_1 + del arg596_1 + del arg597_1 + del arg598_1 + del arg599_1 + del arg59_1 + del arg5_1 + del arg600_1 + del arg601_1 + del arg602_1 + del arg603_1 + del arg604_1 + del arg605_1 + del arg606_1 + del arg607_1 + del arg608_1 + del arg609_1 + del arg60_1 + del arg610_1 + del arg611_1 + del arg612_1 + del arg613_1 + del arg614_1 + del arg615_1 + del arg616_1 + del arg617_1 + del arg618_1 + del arg619_1 + del arg61_1 + del arg620_1 + del arg621_1 + del arg622_1 + del arg623_1 + del arg624_1 + del arg625_1 + del arg626_1 + del arg627_1 + del arg628_1 + del arg629_1 + del arg62_1 + del arg630_1 + del arg631_1 + del arg632_1 + del arg633_1 + del arg634_1 + del arg635_1 + del arg636_1 + del arg637_1 + del arg638_1 + del arg639_1 + del arg63_1 + del arg640_1 + del arg641_1 + del arg642_1 + del arg643_1 + del arg644_1 + del arg645_1 + del arg646_1 + del arg647_1 + del arg648_1 + del arg649_1 + del arg64_1 + del arg650_1 + del arg651_1 + del arg652_1 + del arg653_1 + del arg654_1 + del arg655_1 + del arg656_1 + del arg657_1 + del arg658_1 + del arg659_1 + del arg65_1 + del arg660_1 + del arg661_1 + del arg662_1 + del arg663_1 + del arg664_1 + del arg665_1 + del arg666_1 + del arg667_1 + del arg668_1 + del arg669_1 + del arg66_1 + del arg670_1 + del arg671_1 + del arg672_1 + del arg673_1 + del arg674_1 + del arg675_1 + del arg676_1 + del arg677_1 + del arg678_1 + del arg679_1 + del arg67_1 + del arg680_1 + del arg681_1 + del arg682_1 + del arg683_1 + del arg684_1 + del arg685_1 + del arg686_1 + del arg687_1 + del arg688_1 + del arg689_1 + del arg68_1 + del arg690_1 + del arg691_1 + del arg692_1 + del arg693_1 + del arg694_1 + del arg695_1 + del arg696_1 + del arg697_1 + del arg698_1 + del arg699_1 + del arg69_1 + del arg6_1 + del arg700_1 + del arg701_1 + del arg702_1 + del arg703_1 + del arg704_1 + del arg705_1 + del arg706_1 + del arg707_1 + del arg708_1 + del arg709_1 + del arg70_1 + del arg710_1 + del arg711_1 + del arg712_1 + del arg713_1 + del arg714_1 + del arg715_1 + del arg716_1 + del arg717_1 + del arg718_1 + del arg719_1 + del arg71_1 + del arg720_1 + del arg721_1 + del arg722_1 + del arg723_1 + del arg724_1 + del arg725_1 + del arg726_1 + del arg727_1 + del arg728_1 + del arg729_1 + del arg72_1 + del arg730_1 + del arg731_1 + del arg732_1 + del arg733_1 + del arg734_1 + del arg735_1 + del arg736_1 + del arg737_1 + del arg738_1 + del arg739_1 + del arg73_1 + del arg74_1 + del arg75_1 + del arg76_1 + del arg77_1 + del arg78_1 + del arg79_1 + del arg7_1 + del arg80_1 + del arg81_1 + del arg82_1 + del arg83_1 + del arg84_1 + del arg85_1 + del arg86_1 + del arg87_1 + del arg88_1 + del arg89_1 + del arg8_1 + del arg90_1 + del arg91_1 + del arg92_1 + del arg93_1 + del arg94_1 + del arg95_1 + del arg96_1 + del arg97_1 + del arg98_1 + del arg99_1 + del arg9_1 + return () + + + def benchmark_compiled_module(times=10, repeat=10): + from torch._dynamo.testing import rand_strided + from torch._inductor.utils import print_performance + arg0_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg1_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg2_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg3_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg4_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg5_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg6_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg7_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg8_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg9_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg10_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg11_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg12_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg13_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg14_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg15_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg16_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg17_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg18_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg19_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg20_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg21_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg22_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg23_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg24_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg25_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg26_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg27_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg28_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg29_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg30_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg31_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg32_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg33_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg34_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg35_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg36_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg37_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg38_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg39_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg40_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg41_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg42_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg43_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg44_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg45_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg46_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg47_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg48_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg49_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg50_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg51_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg52_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg53_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg54_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg55_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg56_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg57_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg58_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg59_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg60_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg61_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg62_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg63_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg64_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg65_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg66_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg67_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg68_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg69_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg70_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg71_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg72_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg73_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg74_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg75_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg76_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg77_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg78_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg79_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg80_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg81_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg82_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg83_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg84_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg85_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg86_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg87_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg88_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg89_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg90_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg91_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg92_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg93_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg94_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg95_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg96_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg97_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg98_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg99_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg100_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg101_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg102_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg103_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg104_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg105_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg106_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg107_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg108_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg109_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg110_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg111_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg112_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg113_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg114_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg115_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg116_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg117_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg118_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg119_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg120_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg121_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg122_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg123_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg124_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg125_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg126_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg127_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg128_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg129_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg130_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg131_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg132_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg133_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg134_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg135_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg136_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg137_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg138_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg139_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg140_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg141_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg142_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg143_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg144_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg145_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg146_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg147_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg148_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg149_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg150_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg151_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg152_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg153_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg154_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg155_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg156_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg157_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg158_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg159_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg160_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg161_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg162_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg163_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg164_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg165_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg166_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg167_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg168_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg169_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg170_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg171_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg172_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg173_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg174_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg175_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg176_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg177_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg178_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg179_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg180_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg181_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg182_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg183_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg184_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg185_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg186_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg187_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg188_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg189_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg190_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg191_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg192_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg193_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg194_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg195_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg196_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg197_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg198_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg199_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg200_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg201_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg202_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg203_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg204_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg205_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg206_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg207_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg208_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg209_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg210_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg211_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg212_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg213_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg214_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg215_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg216_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg217_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg218_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg219_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg220_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg221_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg222_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg223_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg224_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg225_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg226_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg227_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg228_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg229_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg230_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg231_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg232_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg233_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg234_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg235_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg236_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg237_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg238_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg239_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg240_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg241_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg242_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg243_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg244_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg245_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg246_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg247_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg248_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg249_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg250_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg251_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg252_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg253_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg254_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg255_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg256_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg257_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg258_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg259_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg260_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg261_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg262_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg263_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg264_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg265_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg266_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg267_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg268_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg269_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg270_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg271_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg272_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg273_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg274_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg275_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg276_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg277_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg278_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg279_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg280_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg281_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg282_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg283_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg284_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg285_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg286_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg287_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg288_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg289_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg290_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg291_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg292_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg293_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg294_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg295_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg296_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg297_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg298_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg299_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg300_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg301_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg302_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg303_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg304_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg305_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg306_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg307_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg308_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg309_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg310_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg311_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg312_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg313_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg314_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg315_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg316_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg317_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg318_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg319_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg320_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg321_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg322_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg323_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg324_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg325_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg326_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg327_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg328_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg329_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg330_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg331_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg332_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg333_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg334_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg335_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg336_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg337_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg338_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg339_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg340_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg341_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg342_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg343_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg344_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg345_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg346_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg347_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg348_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg349_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg350_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg351_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg352_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg353_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg354_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg355_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg356_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg357_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg358_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg359_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg360_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg361_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg362_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg363_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg364_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg365_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg366_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg367_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg368_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg369_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg370_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg371_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg372_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg373_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg374_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg375_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg376_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg377_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg378_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg379_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg380_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg381_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg382_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg383_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg384_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg385_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg386_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg387_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg388_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg389_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg390_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg391_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg392_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg393_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg394_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg395_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg396_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg397_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg398_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg399_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg400_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg401_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg402_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg403_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg404_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg405_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg406_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg407_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg408_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg409_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg410_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg411_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg412_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg413_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg414_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg415_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg416_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg417_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg418_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg419_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg420_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg421_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg422_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg423_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg424_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg425_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg426_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg427_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg428_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg429_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg430_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg431_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg432_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg433_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg434_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg435_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg436_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg437_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg438_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg439_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg440_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg441_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg442_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg443_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg444_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg445_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg446_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg447_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg448_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg449_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg450_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg451_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg452_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg453_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg454_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg455_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg456_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg457_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg458_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg459_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg460_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg461_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg462_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg463_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg464_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg465_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg466_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg467_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg468_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg469_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg470_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg471_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg472_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg473_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg474_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg475_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg476_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg477_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg478_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg479_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg480_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg481_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg482_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg483_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg484_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg485_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg486_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg487_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg488_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg489_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg490_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg491_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg492_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg493_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg494_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg495_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg496_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg497_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg498_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg499_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg500_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg501_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg502_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg503_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg504_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg505_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg506_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg507_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg508_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg509_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg510_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg511_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg512_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg513_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg514_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg515_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg516_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg517_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg518_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg519_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg520_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg521_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg522_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg523_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg524_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg525_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg526_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg527_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg528_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg529_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg530_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg531_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg532_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg533_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg534_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg535_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg536_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg537_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg538_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg539_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg540_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg541_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg542_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg543_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg544_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg545_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg546_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg547_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg548_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg549_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg550_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg551_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg552_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg553_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg554_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg555_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg556_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg557_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg558_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg559_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg560_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg561_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg562_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg563_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg564_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg565_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg566_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg567_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg568_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg569_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg570_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg571_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg572_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg573_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg574_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg575_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg576_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg577_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg578_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg579_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg580_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg581_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg582_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32) + arg583_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg584_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg585_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg586_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg587_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32) + arg588_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32) + arg589_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32) + arg590_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg591_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg592_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32) + arg593_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg594_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg595_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg596_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg597_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg598_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg599_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg600_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg601_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg602_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg603_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg604_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg605_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg606_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg607_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg608_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg609_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg610_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg611_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg612_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg613_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg614_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg615_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg616_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg617_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg618_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg619_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg620_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg621_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg622_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg623_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg624_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg625_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg626_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg627_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg628_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg629_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg630_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg631_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg632_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg633_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg634_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg635_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg636_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg637_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg638_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg639_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg640_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg641_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg642_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg643_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg644_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg645_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg646_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg647_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg648_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg649_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg650_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg651_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg652_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg653_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg654_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg655_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg656_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg657_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg658_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg659_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg660_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg661_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg662_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg663_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg664_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg665_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg666_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg667_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg668_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg669_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg670_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg671_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg672_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg673_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg674_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg675_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg676_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg677_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg678_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg679_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg680_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg681_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg682_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg683_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg684_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg685_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg686_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg687_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg688_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg689_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg690_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg691_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg692_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg693_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg694_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg695_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg696_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg697_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg698_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg699_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg700_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg701_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg702_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg703_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg704_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg705_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg706_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg707_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg708_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg709_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg710_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg711_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg712_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg713_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg714_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg715_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg716_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg717_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg718_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg719_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg720_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg721_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg722_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg723_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg724_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg725_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg726_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg727_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg728_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg729_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg730_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg731_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg732_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg733_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg734_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg735_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg736_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg737_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg738_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + arg739_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32) + fn = lambda: call([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1]) + return print_performance(fn, times=times, repeat=repeat) + + + if __name__ == "__main__": + from torch._inductor.wrapper_benchmark import compiled_module_main + compiled_module_main('nanogpt', benchmark_compiled_module) + +V0806 13:56:22.230000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9712c78834c2d72b350fa84c50d70770"} + { + "name": "code_gen", + "ts": 1722977782230623.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.230000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "abaf421eb449f454f1dd958665f461eb"} + { + "name": "GraphLowering.compile_to_module", + "ts": 1722977782230854.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.384000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "5e958fee1956dad5abf3d91b2bc0b309"} + { + "name": "fx_graph_cache_miss", + "ts": 1722977770798463.8, + "args": { + "key": "f2hzi4mmzauwdbyib6zmykorraxjbqftyvglo6f4mz2b36wljiti", + "cache_state": "miss", + "components": [ + "[i5hietdxt6dlkcrwbpsvei6udef6z3eec54zo7cpjzbybmgvi7b] gm: ()\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1):\n _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)\n getitem = _foreach_add[0]\n getitem_1 = _foreach_add[1]\n getitem_2 = _foreach_add[2]\n getitem_3 = _foreach_add[3]\n getitem_4 = _foreach_add[4]\n getitem_5 = _foreach_add[5]\n getitem_6 = _foreach_add[6]\n getitem_7 = _foreach_add[7]\n getitem_8 = _foreach_add[8]\n getitem_9 = _foreach_add[9]\n getitem_10 = _foreach_add[10]\n getitem_11 = _foreach_add[11]\n getitem_12 = _foreach_add[12]\n getitem_13 = _foreach_add[13]\n getitem_14 = _foreach_add[14]\n getitem_15 = _foreach_add[15]\n getitem_16 = _foreach_add[16]\n getitem_17 = _foreach_add[17]\n getitem_18 = _foreach_add[18]\n getitem_19 = _foreach_add[19]\n getitem_20 = _foreach_add[20]\n getitem_21 = _foreach_add[21]\n getitem_22 = _foreach_add[22]\n getitem_23 = _foreach_add[23]\n getitem_24 = _foreach_add[24]\n getitem_25 = _foreach_add[25]\n getitem_26 = _foreach_add[26]\n getitem_27 = _foreach_add[27]\n getitem_28 = _foreach_add[28]\n getitem_29 = _foreach_add[29]\n getitem_30 = _foreach_add[30]\n getitem_31 = _foreach_add[31]\n getitem_32 = _foreach_add[32]\n getitem_33 = _foreach_add[33]\n getitem_34 = _foreach_add[34]\n getitem_35 = _foreach_add[35]\n getitem_36 = _foreach_add[36]\n getitem_37 = _foreach_add[37]\n getitem_38 = _foreach_add[38]\n getitem_39 = _foreach_add[39]\n getitem_40 = _foreach_add[40]\n getitem_41 = _foreach_add[41]\n getitem_42 = _foreach_add[42]\n getitem_43 = _foreach_add[43]\n getitem_44 = _foreach_add[44]\n getitem_45 = _foreach_add[45]\n getitem_46 = _foreach_add[46]\n getitem_47 = _foreach_add[47]\n getitem_48 = _foreach_add[48]\n getitem_49 = _foreach_add[49]\n getitem_50 = _foreach_add[50]\n getitem_51 = _foreach_add[51]\n getitem_52 = _foreach_add[52]\n getitem_53 = _foreach_add[53]\n getitem_54 = _foreach_add[54]\n getitem_55 = _foreach_add[55]\n getitem_56 = _foreach_add[56]\n getitem_57 = _foreach_add[57]\n getitem_58 = _foreach_add[58]\n getitem_59 = _foreach_add[59]\n getitem_60 = _foreach_add[60]\n getitem_61 = _foreach_add[61]\n getitem_62 = _foreach_add[62]\n getitem_63 = _foreach_add[63]\n getitem_64 = _foreach_add[64]\n getitem_65 = _foreach_add[65]\n getitem_66 = _foreach_add[66]\n getitem_67 = _foreach_add[67]\n getitem_68 = _foreach_add[68]\n getitem_69 = _foreach_add[69]\n getitem_70 = _foreach_add[70]\n getitem_71 = _foreach_add[71]\n getitem_72 = _foreach_add[72]\n getitem_73 = _foreach_add[73]\n getitem_74 = _foreach_add[74]\n getitem_75 = _foreach_add[75]\n getitem_76 = _foreach_add[76]\n getitem_77 = _foreach_add[77]\n getitem_78 = _foreach_add[78]\n getitem_79 = _foreach_add[79]\n getitem_80 = _foreach_add[80]\n getitem_81 = _foreach_add[81]\n getitem_82 = _foreach_add[82]\n getitem_83 = _foreach_add[83]\n getitem_84 = _foreach_add[84]\n getitem_85 = _foreach_add[85]\n getitem_86 = _foreach_add[86]\n getitem_87 = _foreach_add[87]\n getitem_88 = _foreach_add[88]\n getitem_89 = _foreach_add[89]\n getitem_90 = _foreach_add[90]\n getitem_91 = _foreach_add[91]\n getitem_92 = _foreach_add[92]\n getitem_93 = _foreach_add[93]\n getitem_94 = _foreach_add[94]\n getitem_95 = _foreach_add[95]\n getitem_96 = _foreach_add[96]\n getitem_97 = _foreach_add[97]\n getitem_98 = _foreach_add[98]\n getitem_99 = _foreach_add[99]\n getitem_100 = _foreach_add[100]\n getitem_101 = _foreach_add[101]\n getitem_102 = _foreach_add[102]\n getitem_103 = _foreach_add[103]\n getitem_104 = _foreach_add[104]\n getitem_105 = _foreach_add[105]\n getitem_106 = _foreach_add[106]\n getitem_107 = _foreach_add[107]\n getitem_108 = _foreach_add[108]\n getitem_109 = _foreach_add[109]\n getitem_110 = _foreach_add[110]\n getitem_111 = _foreach_add[111]\n getitem_112 = _foreach_add[112]\n getitem_113 = _foreach_add[113]\n getitem_114 = _foreach_add[114]\n getitem_115 = _foreach_add[115]\n getitem_116 = _foreach_add[116]\n getitem_117 = _foreach_add[117]\n getitem_118 = _foreach_add[118]\n getitem_119 = _foreach_add[119]\n getitem_120 = _foreach_add[120]\n getitem_121 = _foreach_add[121]\n getitem_122 = _foreach_add[122]\n getitem_123 = _foreach_add[123]\n getitem_124 = _foreach_add[124]\n getitem_125 = _foreach_add[125]\n getitem_126 = _foreach_add[126]\n getitem_127 = _foreach_add[127]\n getitem_128 = _foreach_add[128]\n getitem_129 = _foreach_add[129]\n getitem_130 = _foreach_add[130]\n getitem_131 = _foreach_add[131]\n getitem_132 = _foreach_add[132]\n getitem_133 = _foreach_add[133]\n getitem_134 = _foreach_add[134]\n getitem_135 = _foreach_add[135]\n getitem_136 = _foreach_add[136]\n getitem_137 = _foreach_add[137]\n getitem_138 = _foreach_add[138]\n getitem_139 = _foreach_add[139]\n getitem_140 = _foreach_add[140]\n getitem_141 = _foreach_add[141]\n getitem_142 = _foreach_add[142]\n getitem_143 = _foreach_add[143]\n getitem_144 = _foreach_add[144]\n getitem_145 = _foreach_add[145]\n getitem_146 = _foreach_add[146]\n getitem_147 = _foreach_add[147]; _foreach_add = None\n _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])\n getitem_148 = _foreach_sub[0]\n getitem_149 = _foreach_sub[1]\n getitem_150 = _foreach_sub[2]\n getitem_151 = _foreach_sub[3]\n getitem_152 = _foreach_sub[4]\n getitem_153 = _foreach_sub[5]\n getitem_154 = _foreach_sub[6]\n getitem_155 = _foreach_sub[7]\n getitem_156 = _foreach_sub[8]\n getitem_157 = _foreach_sub[9]\n getitem_158 = _foreach_sub[10]\n getitem_159 = _foreach_sub[11]\n getitem_160 = _foreach_sub[12]\n getitem_161 = _foreach_sub[13]\n getitem_162 = _foreach_sub[14]\n getitem_163 = _foreach_sub[15]\n getitem_164 = _foreach_sub[16]\n getitem_165 = _foreach_sub[17]\n getitem_166 = _foreach_sub[18]\n getitem_167 = _foreach_sub[19]\n getitem_168 = _foreach_sub[20]\n getitem_169 = _foreach_sub[21]\n getitem_170 = _foreach_sub[22]\n getitem_171 = _foreach_sub[23]\n getitem_172 = _foreach_sub[24]\n getitem_173 = _foreach_sub[25]\n getitem_174 = _foreach_sub[26]\n getitem_175 = _foreach_sub[27]\n getitem_176 = _foreach_sub[28]\n getitem_177 = _foreach_sub[29]\n getitem_178 = _foreach_sub[30]\n getitem_179 = _foreach_sub[31]\n getitem_180 = _foreach_sub[32]\n getitem_181 = _foreach_sub[33]\n getitem_182 = _foreach_sub[34]\n getitem_183 = _foreach_sub[35]\n getitem_184 = _foreach_sub[36]\n getitem_185 = _foreach_sub[37]\n getitem_186 = _foreach_sub[38]\n getitem_187 = _foreach_sub[39]\n getitem_188 = _foreach_sub[40]\n getitem_189 = _foreach_sub[41]\n getitem_190 = _foreach_sub[42]\n getitem_191 = _foreach_sub[43]\n getitem_192 = _foreach_sub[44]\n getitem_193 = _foreach_sub[45]\n getitem_194 = _foreach_sub[46]\n getitem_195 = _foreach_sub[47]\n getitem_196 = _foreach_sub[48]\n getitem_197 = _foreach_sub[49]\n getitem_198 = _foreach_sub[50]\n getitem_199 = _foreach_sub[51]\n getitem_200 = _foreach_sub[52]\n getitem_201 = _foreach_sub[53]\n getitem_202 = _foreach_sub[54]\n getitem_203 = _foreach_sub[55]\n getitem_204 = _foreach_sub[56]\n getitem_205 = _foreach_sub[57]\n getitem_206 = _foreach_sub[58]\n getitem_207 = _foreach_sub[59]\n getitem_208 = _foreach_sub[60]\n getitem_209 = _foreach_sub[61]\n getitem_210 = _foreach_sub[62]\n getitem_211 = _foreach_sub[63]\n getitem_212 = _foreach_sub[64]\n getitem_213 = _foreach_sub[65]\n getitem_214 = _foreach_sub[66]\n getitem_215 = _foreach_sub[67]\n getitem_216 = _foreach_sub[68]\n getitem_217 = _foreach_sub[69]\n getitem_218 = _foreach_sub[70]\n getitem_219 = _foreach_sub[71]\n getitem_220 = _foreach_sub[72]\n getitem_221 = _foreach_sub[73]\n getitem_222 = _foreach_sub[74]\n getitem_223 = _foreach_sub[75]\n getitem_224 = _foreach_sub[76]\n getitem_225 = _foreach_sub[77]\n getitem_226 = _foreach_sub[78]\n getitem_227 = _foreach_sub[79]\n getitem_228 = _foreach_sub[80]\n getitem_229 = _foreach_sub[81]\n getitem_230 = _foreach_sub[82]\n getitem_231 = _foreach_sub[83]\n getitem_232 = _foreach_sub[84]\n getitem_233 = _foreach_sub[85]\n getitem_234 = _foreach_sub[86]\n getitem_235 = _foreach_sub[87]\n getitem_236 = _foreach_sub[88]\n getitem_237 = _foreach_sub[89]\n getitem_238 = _foreach_sub[90]\n getitem_239 = _foreach_sub[91]\n getitem_240 = _foreach_sub[92]\n getitem_241 = _foreach_sub[93]\n getitem_242 = _foreach_sub[94]\n getitem_243 = _foreach_sub[95]\n getitem_244 = _foreach_sub[96]\n getitem_245 = _foreach_sub[97]\n getitem_246 = _foreach_sub[98]\n getitem_247 = _foreach_sub[99]\n getitem_248 = _foreach_sub[100]\n getitem_249 = _foreach_sub[101]\n getitem_250 = _foreach_sub[102]\n getitem_251 = _foreach_sub[103]\n getitem_252 = _foreach_sub[104]\n getitem_253 = _foreach_sub[105]\n getitem_254 = _foreach_sub[106]\n getitem_255 = _foreach_sub[107]\n getitem_256 = _foreach_sub[108]\n getitem_257 = _foreach_sub[109]\n getitem_258 = _foreach_sub[110]\n getitem_259 = _foreach_sub[111]\n getitem_260 = _foreach_sub[112]\n getitem_261 = _foreach_sub[113]\n getitem_262 = _foreach_sub[114]\n getitem_263 = _foreach_sub[115]\n getitem_264 = _foreach_sub[116]\n getitem_265 = _foreach_sub[117]\n getitem_266 = _foreach_sub[118]\n getitem_267 = _foreach_sub[119]\n getitem_268 = _foreach_sub[120]\n getitem_269 = _foreach_sub[121]\n getitem_270 = _foreach_sub[122]\n getitem_271 = _foreach_sub[123]\n getitem_272 = _foreach_sub[124]\n getitem_273 = _foreach_sub[125]\n getitem_274 = _foreach_sub[126]\n getitem_275 = _foreach_sub[127]\n getitem_276 = _foreach_sub[128]\n getitem_277 = _foreach_sub[129]\n getitem_278 = _foreach_sub[130]\n getitem_279 = _foreach_sub[131]\n getitem_280 = _foreach_sub[132]\n getitem_281 = _foreach_sub[133]\n getitem_282 = _foreach_sub[134]\n getitem_283 = _foreach_sub[135]\n getitem_284 = _foreach_sub[136]\n getitem_285 = _foreach_sub[137]\n getitem_286 = _foreach_sub[138]\n getitem_287 = _foreach_sub[139]\n getitem_288 = _foreach_sub[140]\n getitem_289 = _foreach_sub[141]\n getitem_290 = _foreach_sub[142]\n getitem_291 = _foreach_sub[143]\n getitem_292 = _foreach_sub[144]\n getitem_293 = _foreach_sub[145]\n getitem_294 = _foreach_sub[146]\n getitem_295 = _foreach_sub[147]; _foreach_sub = None\n _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998); getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None\n getitem_296 = _foreach_mul[0]\n getitem_297 = _foreach_mul[1]\n getitem_298 = _foreach_mul[2]\n getitem_299 = _foreach_mul[3]\n getitem_300 = _foreach_mul[4]\n getitem_301 = _foreach_mul[5]\n getitem_302 = _foreach_mul[6]\n getitem_303 = _foreach_mul[7]\n getitem_304 = _foreach_mul[8]\n getitem_305 = _foreach_mul[9]\n getitem_306 = _foreach_mul[10]\n getitem_307 = _foreach_mul[11]\n getitem_308 = _foreach_mul[12]\n getitem_309 = _foreach_mul[13]\n getitem_310 = _foreach_mul[14]\n getitem_311 = _foreach_mul[15]\n getitem_312 = _foreach_mul[16]\n getitem_313 = _foreach_mul[17]\n getitem_314 = _foreach_mul[18]\n getitem_315 = _foreach_mul[19]\n getitem_316 = _foreach_mul[20]\n getitem_317 = _foreach_mul[21]\n getitem_318 = _foreach_mul[22]\n getitem_319 = _foreach_mul[23]\n getitem_320 = _foreach_mul[24]\n getitem_321 = _foreach_mul[25]\n getitem_322 = _foreach_mul[26]\n getitem_323 = _foreach_mul[27]\n getitem_324 = _foreach_mul[28]\n getitem_325 = _foreach_mul[29]\n getitem_326 = _foreach_mul[30]\n getitem_327 = _foreach_mul[31]\n getitem_328 = _foreach_mul[32]\n getitem_329 = _foreach_mul[33]\n getitem_330 = _foreach_mul[34]\n getitem_331 = _foreach_mul[35]\n getitem_332 = _foreach_mul[36]\n getitem_333 = _foreach_mul[37]\n getitem_334 = _foreach_mul[38]\n getitem_335 = _foreach_mul[39]\n getitem_336 = _foreach_mul[40]\n getitem_337 = _foreach_mul[41]\n getitem_338 = _foreach_mul[42]\n getitem_339 = _foreach_mul[43]\n getitem_340 = _foreach_mul[44]\n getitem_341 = _foreach_mul[45]\n getitem_342 = _foreach_mul[46]\n getitem_343 = _foreach_mul[47]\n getitem_344 = _foreach_mul[48]\n getitem_345 = _foreach_mul[49]\n getitem_346 = _foreach_mul[50]\n getitem_347 = _foreach_mul[51]\n getitem_348 = _foreach_mul[52]\n getitem_349 = _foreach_mul[53]\n getitem_350 = _foreach_mul[54]\n getitem_351 = _foreach_mul[55]\n getitem_352 = _foreach_mul[56]\n getitem_353 = _foreach_mul[57]\n getitem_354 = _foreach_mul[58]\n getitem_355 = _foreach_mul[59]\n getitem_356 = _foreach_mul[60]\n getitem_357 = _foreach_mul[61]\n getitem_358 = _foreach_mul[62]\n getitem_359 = _foreach_mul[63]\n getitem_360 = _foreach_mul[64]\n getitem_361 = _foreach_mul[65]\n getitem_362 = _foreach_mul[66]\n getitem_363 = _foreach_mul[67]\n getitem_364 = _foreach_mul[68]\n getitem_365 = _foreach_mul[69]\n getitem_366 = _foreach_mul[70]\n getitem_367 = _foreach_mul[71]\n getitem_368 = _foreach_mul[72]\n getitem_369 = _foreach_mul[73]\n getitem_370 = _foreach_mul[74]\n getitem_371 = _foreach_mul[75]\n getitem_372 = _foreach_mul[76]\n getitem_373 = _foreach_mul[77]\n getitem_374 = _foreach_mul[78]\n getitem_375 = _foreach_mul[79]\n getitem_376 = _foreach_mul[80]\n getitem_377 = _foreach_mul[81]\n getitem_378 = _foreach_mul[82]\n getitem_379 = _foreach_mul[83]\n getitem_380 = _foreach_mul[84]\n getitem_381 = _foreach_mul[85]\n getitem_382 = _foreach_mul[86]\n getitem_383 = _foreach_mul[87]\n getitem_384 = _foreach_mul[88]\n getitem_385 = _foreach_mul[89]\n getitem_386 = _foreach_mul[90]\n getitem_387 = _foreach_mul[91]\n getitem_388 = _foreach_mul[92]\n getitem_389 = _foreach_mul[93]\n getitem_390 = _foreach_mul[94]\n getitem_391 = _foreach_mul[95]\n getitem_392 = _foreach_mul[96]\n getitem_393 = _foreach_mul[97]\n getitem_394 = _foreach_mul[98]\n getitem_395 = _foreach_mul[99]\n getitem_396 = _foreach_mul[100]\n getitem_397 = _foreach_mul[101]\n getitem_398 = _foreach_mul[102]\n getitem_399 = _foreach_mul[103]\n getitem_400 = _foreach_mul[104]\n getitem_401 = _foreach_mul[105]\n getitem_402 = _foreach_mul[106]\n getitem_403 = _foreach_mul[107]\n getitem_404 = _foreach_mul[108]\n getitem_405 = _foreach_mul[109]\n getitem_406 = _foreach_mul[110]\n getitem_407 = _foreach_mul[111]\n getitem_408 = _foreach_mul[112]\n getitem_409 = _foreach_mul[113]\n getitem_410 = _foreach_mul[114]\n getitem_411 = _foreach_mul[115]\n getitem_412 = _foreach_mul[116]\n getitem_413 = _foreach_mul[117]\n getitem_414 = _foreach_mul[118]\n getitem_415 = _foreach_mul[119]\n getitem_416 = _foreach_mul[120]\n getitem_417 = _foreach_mul[121]\n getitem_418 = _foreach_mul[122]\n getitem_419 = _foreach_mul[123]\n getitem_420 = _foreach_mul[124]\n getitem_421 = _foreach_mul[125]\n getitem_422 = _foreach_mul[126]\n getitem_423 = _foreach_mul[127]\n getitem_424 = _foreach_mul[128]\n getitem_425 = _foreach_mul[129]\n getitem_426 = _foreach_mul[130]\n getitem_427 = _foreach_mul[131]\n getitem_428 = _foreach_mul[132]\n getitem_429 = _foreach_mul[133]\n getitem_430 = _foreach_mul[134]\n getitem_431 = _foreach_mul[135]\n getitem_432 = _foreach_mul[136]\n getitem_433 = _foreach_mul[137]\n getitem_434 = _foreach_mul[138]\n getitem_435 = _foreach_mul[139]\n getitem_436 = _foreach_mul[140]\n getitem_437 = _foreach_mul[141]\n getitem_438 = _foreach_mul[142]\n getitem_439 = _foreach_mul[143]\n getitem_440 = _foreach_mul[144]\n getitem_441 = _foreach_mul[145]\n getitem_442 = _foreach_mul[146]\n getitem_443 = _foreach_mul[147]; _foreach_mul = None\n _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]); getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None\n getitem_444 = _foreach_add_1[0]\n getitem_445 = _foreach_add_1[1]\n getitem_446 = _foreach_add_1[2]\n getitem_447 = _foreach_add_1[3]\n getitem_448 = _foreach_add_1[4]\n getitem_449 = _foreach_add_1[5]\n getitem_450 = _foreach_add_1[6]\n getitem_451 = _foreach_add_1[7]\n getitem_452 = _foreach_add_1[8]\n getitem_453 = _foreach_add_1[9]\n getitem_454 = _foreach_add_1[10]\n getitem_455 = _foreach_add_1[11]\n getitem_456 = _foreach_add_1[12]\n getitem_457 = _foreach_add_1[13]\n getitem_458 = _foreach_add_1[14]\n getitem_459 = _foreach_add_1[15]\n getitem_460 = _foreach_add_1[16]\n getitem_461 = _foreach_add_1[17]\n getitem_462 = _foreach_add_1[18]\n getitem_463 = _foreach_add_1[19]\n getitem_464 = _foreach_add_1[20]\n getitem_465 = _foreach_add_1[21]\n getitem_466 = _foreach_add_1[22]\n getitem_467 = _foreach_add_1[23]\n getitem_468 = _foreach_add_1[24]\n getitem_469 = _foreach_add_1[25]\n getitem_470 = _foreach_add_1[26]\n getitem_471 = _foreach_add_1[27]\n getitem_472 = _foreach_add_1[28]\n getitem_473 = _foreach_add_1[29]\n getitem_474 = _foreach_add_1[30]\n getitem_475 = _foreach_add_1[31]\n getitem_476 = _foreach_add_1[32]\n getitem_477 = _foreach_add_1[33]\n getitem_478 = _foreach_add_1[34]\n getitem_479 = _foreach_add_1[35]\n getitem_480 = _foreach_add_1[36]\n getitem_481 = _foreach_add_1[37]\n getitem_482 = _foreach_add_1[38]\n getitem_483 = _foreach_add_1[39]\n getitem_484 = _foreach_add_1[40]\n getitem_485 = _foreach_add_1[41]\n getitem_486 = _foreach_add_1[42]\n getitem_487 = _foreach_add_1[43]\n getitem_488 = _foreach_add_1[44]\n getitem_489 = _foreach_add_1[45]\n getitem_490 = _foreach_add_1[46]\n getitem_491 = _foreach_add_1[47]\n getitem_492 = _foreach_add_1[48]\n getitem_493 = _foreach_add_1[49]\n getitem_494 = _foreach_add_1[50]\n getitem_495 = _foreach_add_1[51]\n getitem_496 = _foreach_add_1[52]\n getitem_497 = _foreach_add_1[53]\n getitem_498 = _foreach_add_1[54]\n getitem_499 = _foreach_add_1[55]\n getitem_500 = _foreach_add_1[56]\n getitem_501 = _foreach_add_1[57]\n getitem_502 = _foreach_add_1[58]\n getitem_503 = _foreach_add_1[59]\n getitem_504 = _foreach_add_1[60]\n getitem_505 = _foreach_add_1[61]\n getitem_506 = _foreach_add_1[62]\n getitem_507 = _foreach_add_1[63]\n getitem_508 = _foreach_add_1[64]\n getitem_509 = _foreach_add_1[65]\n getitem_510 = _foreach_add_1[66]\n getitem_511 = _foreach_add_1[67]\n getitem_512 = _foreach_add_1[68]\n getitem_513 = _foreach_add_1[69]\n getitem_514 = _foreach_add_1[70]\n getitem_515 = _foreach_add_1[71]\n getitem_516 = _foreach_add_1[72]\n getitem_517 = _foreach_add_1[73]\n getitem_518 = _foreach_add_1[74]\n getitem_519 = _foreach_add_1[75]\n getitem_520 = _foreach_add_1[76]\n getitem_521 = _foreach_add_1[77]\n getitem_522 = _foreach_add_1[78]\n getitem_523 = _foreach_add_1[79]\n getitem_524 = _foreach_add_1[80]\n getitem_525 = _foreach_add_1[81]\n getitem_526 = _foreach_add_1[82]\n getitem_527 = _foreach_add_1[83]\n getitem_528 = _foreach_add_1[84]\n getitem_529 = _foreach_add_1[85]\n getitem_530 = _foreach_add_1[86]\n getitem_531 = _foreach_add_1[87]\n getitem_532 = _foreach_add_1[88]\n getitem_533 = _foreach_add_1[89]\n getitem_534 = _foreach_add_1[90]\n getitem_535 = _foreach_add_1[91]\n getitem_536 = _foreach_add_1[92]\n getitem_537 = _foreach_add_1[93]\n getitem_538 = _foreach_add_1[94]\n getitem_539 = _foreach_add_1[95]\n getitem_540 = _foreach_add_1[96]\n getitem_541 = _foreach_add_1[97]\n getitem_542 = _foreach_add_1[98]\n getitem_543 = _foreach_add_1[99]\n getitem_544 = _foreach_add_1[100]\n getitem_545 = _foreach_add_1[101]\n getitem_546 = _foreach_add_1[102]\n getitem_547 = _foreach_add_1[103]\n getitem_548 = _foreach_add_1[104]\n getitem_549 = _foreach_add_1[105]\n getitem_550 = _foreach_add_1[106]\n getitem_551 = _foreach_add_1[107]\n getitem_552 = _foreach_add_1[108]\n getitem_553 = _foreach_add_1[109]\n getitem_554 = _foreach_add_1[110]\n getitem_555 = _foreach_add_1[111]\n getitem_556 = _foreach_add_1[112]\n getitem_557 = _foreach_add_1[113]\n getitem_558 = _foreach_add_1[114]\n getitem_559 = _foreach_add_1[115]\n getitem_560 = _foreach_add_1[116]\n getitem_561 = _foreach_add_1[117]\n getitem_562 = _foreach_add_1[118]\n getitem_563 = _foreach_add_1[119]\n getitem_564 = _foreach_add_1[120]\n getitem_565 = _foreach_add_1[121]\n getitem_566 = _foreach_add_1[122]\n getitem_567 = _foreach_add_1[123]\n getitem_568 = _foreach_add_1[124]\n getitem_569 = _foreach_add_1[125]\n getitem_570 = _foreach_add_1[126]\n getitem_571 = _foreach_add_1[127]\n getitem_572 = _foreach_add_1[128]\n getitem_573 = _foreach_add_1[129]\n getitem_574 = _foreach_add_1[130]\n getitem_575 = _foreach_add_1[131]\n getitem_576 = _foreach_add_1[132]\n getitem_577 = _foreach_add_1[133]\n getitem_578 = _foreach_add_1[134]\n getitem_579 = _foreach_add_1[135]\n getitem_580 = _foreach_add_1[136]\n getitem_581 = _foreach_add_1[137]\n getitem_582 = _foreach_add_1[138]\n getitem_583 = _foreach_add_1[139]\n getitem_584 = _foreach_add_1[140]\n getitem_585 = _foreach_add_1[141]\n getitem_586 = _foreach_add_1[142]\n getitem_587 = _foreach_add_1[143]\n getitem_588 = _foreach_add_1[144]\n getitem_589 = _foreach_add_1[145]\n getitem_590 = _foreach_add_1[146]\n getitem_591 = _foreach_add_1[147]; _foreach_add_1 = None\n _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)\n getitem_592 = _foreach_mul_1[0]\n getitem_593 = _foreach_mul_1[1]\n getitem_594 = _foreach_mul_1[2]\n getitem_595 = _foreach_mul_1[3]\n getitem_596 = _foreach_mul_1[4]\n getitem_597 = _foreach_mul_1[5]\n getitem_598 = _foreach_mul_1[6]\n getitem_599 = _foreach_mul_1[7]\n getitem_600 = _foreach_mul_1[8]\n getitem_601 = _foreach_mul_1[9]\n getitem_602 = _foreach_mul_1[10]\n getitem_603 = _foreach_mul_1[11]\n getitem_604 = _foreach_mul_1[12]\n getitem_605 = _foreach_mul_1[13]\n getitem_606 = _foreach_mul_1[14]\n getitem_607 = _foreach_mul_1[15]\n getitem_608 = _foreach_mul_1[16]\n getitem_609 = _foreach_mul_1[17]\n getitem_610 = _foreach_mul_1[18]\n getitem_611 = _foreach_mul_1[19]\n getitem_612 = _foreach_mul_1[20]\n getitem_613 = _foreach_mul_1[21]\n getitem_614 = _foreach_mul_1[22]\n getitem_615 = _foreach_mul_1[23]\n getitem_616 = _foreach_mul_1[24]\n getitem_617 = _foreach_mul_1[25]\n getitem_618 = _foreach_mul_1[26]\n getitem_619 = _foreach_mul_1[27]\n getitem_620 = _foreach_mul_1[28]\n getitem_621 = _foreach_mul_1[29]\n getitem_622 = _foreach_mul_1[30]\n getitem_623 = _foreach_mul_1[31]\n getitem_624 = _foreach_mul_1[32]\n getitem_625 = _foreach_mul_1[33]\n getitem_626 = _foreach_mul_1[34]\n getitem_627 = _foreach_mul_1[35]\n getitem_628 = _foreach_mul_1[36]\n getitem_629 = _foreach_mul_1[37]\n getitem_630 = _foreach_mul_1[38]\n getitem_631 = _foreach_mul_1[39]\n getitem_632 = _foreach_mul_1[40]\n getitem_633 = _foreach_mul_1[41]\n getitem_634 = _foreach_mul_1[42]\n getitem_635 = _foreach_mul_1[43]\n getitem_636 = _foreach_mul_1[44]\n getitem_637 = _foreach_mul_1[45]\n getitem_638 = _foreach_mul_1[46]\n getitem_639 = _foreach_mul_1[47]\n getitem_640 = _foreach_mul_1[48]\n getitem_641 = _foreach_mul_1[49]\n getitem_642 = _foreach_mul_1[50]\n getitem_643 = _foreach_mul_1[51]\n getitem_644 = _foreach_mul_1[52]\n getitem_645 = _foreach_mul_1[53]\n getitem_646 = _foreach_mul_1[54]\n getitem_647 = _foreach_mul_1[55]\n getitem_648 = _foreach_mul_1[56]\n getitem_649 = _foreach_mul_1[57]\n getitem_650 = _foreach_mul_1[58]\n getitem_651 = _foreach_mul_1[59]\n getitem_652 = _foreach_mul_1[60]\n getitem_653 = _foreach_mul_1[61]\n getitem_654 = _foreach_mul_1[62]\n getitem_655 = _foreach_mul_1[63]\n getitem_656 = _foreach_mul_1[64]\n getitem_657 = _foreach_mul_1[65]\n getitem_658 = _foreach_mul_1[66]\n getitem_659 = _foreach_mul_1[67]\n getitem_660 = _foreach_mul_1[68]\n getitem_661 = _foreach_mul_1[69]\n getitem_662 = _foreach_mul_1[70]\n getitem_663 = _foreach_mul_1[71]\n getitem_664 = _foreach_mul_1[72]\n getitem_665 = _foreach_mul_1[73]\n getitem_666 = _foreach_mul_1[74]\n getitem_667 = _foreach_mul_1[75]\n getitem_668 = _foreach_mul_1[76]\n getitem_669 = _foreach_mul_1[77]\n getitem_670 = _foreach_mul_1[78]\n getitem_671 = _foreach_mul_1[79]\n getitem_672 = _foreach_mul_1[80]\n getitem_673 = _foreach_mul_1[81]\n getitem_674 = _foreach_mul_1[82]\n getitem_675 = _foreach_mul_1[83]\n getitem_676 = _foreach_mul_1[84]\n getitem_677 = _foreach_mul_1[85]\n getitem_678 = _foreach_mul_1[86]\n getitem_679 = _foreach_mul_1[87]\n getitem_680 = _foreach_mul_1[88]\n getitem_681 = _foreach_mul_1[89]\n getitem_682 = _foreach_mul_1[90]\n getitem_683 = _foreach_mul_1[91]\n getitem_684 = _foreach_mul_1[92]\n getitem_685 = _foreach_mul_1[93]\n getitem_686 = _foreach_mul_1[94]\n getitem_687 = _foreach_mul_1[95]\n getitem_688 = _foreach_mul_1[96]\n getitem_689 = _foreach_mul_1[97]\n getitem_690 = _foreach_mul_1[98]\n getitem_691 = _foreach_mul_1[99]\n getitem_692 = _foreach_mul_1[100]\n getitem_693 = _foreach_mul_1[101]\n getitem_694 = _foreach_mul_1[102]\n getitem_695 = _foreach_mul_1[103]\n getitem_696 = _foreach_mul_1[104]\n getitem_697 = _foreach_mul_1[105]\n getitem_698 = _foreach_mul_1[106]\n getitem_699 = _foreach_mul_1[107]\n getitem_700 = _foreach_mul_1[108]\n getitem_701 = _foreach_mul_1[109]\n getitem_702 = _foreach_mul_1[110]\n getitem_703 = _foreach_mul_1[111]\n getitem_704 = _foreach_mul_1[112]\n getitem_705 = _foreach_mul_1[113]\n getitem_706 = _foreach_mul_1[114]\n getitem_707 = _foreach_mul_1[115]\n getitem_708 = _foreach_mul_1[116]\n getitem_709 = _foreach_mul_1[117]\n getitem_710 = _foreach_mul_1[118]\n getitem_711 = _foreach_mul_1[119]\n getitem_712 = _foreach_mul_1[120]\n getitem_713 = _foreach_mul_1[121]\n getitem_714 = _foreach_mul_1[122]\n getitem_715 = _foreach_mul_1[123]\n getitem_716 = _foreach_mul_1[124]\n getitem_717 = _foreach_mul_1[125]\n getitem_718 = _foreach_mul_1[126]\n getitem_719 = _foreach_mul_1[127]\n getitem_720 = _foreach_mul_1[128]\n getitem_721 = _foreach_mul_1[129]\n getitem_722 = _foreach_mul_1[130]\n getitem_723 = _foreach_mul_1[131]\n getitem_724 = _foreach_mul_1[132]\n getitem_725 = _foreach_mul_1[133]\n getitem_726 = _foreach_mul_1[134]\n getitem_727 = _foreach_mul_1[135]\n getitem_728 = _foreach_mul_1[136]\n getitem_729 = _foreach_mul_1[137]\n getitem_730 = _foreach_mul_1[138]\n getitem_731 = _foreach_mul_1[139]\n getitem_732 = _foreach_mul_1[140]\n getitem_733 = _foreach_mul_1[141]\n getitem_734 = _foreach_mul_1[142]\n getitem_735 = _foreach_mul_1[143]\n getitem_736 = _foreach_mul_1[144]\n getitem_737 = _foreach_mul_1[145]\n getitem_738 = _foreach_mul_1[146]\n getitem_739 = _foreach_mul_1[147]; _foreach_mul_1 = None\n _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]); arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None\n getitem_740 = _foreach_mul_2[0]\n getitem_741 = _foreach_mul_2[1]\n getitem_742 = _foreach_mul_2[2]\n getitem_743 = _foreach_mul_2[3]\n getitem_744 = _foreach_mul_2[4]\n getitem_745 = _foreach_mul_2[5]\n getitem_746 = _foreach_mul_2[6]\n getitem_747 = _foreach_mul_2[7]\n getitem_748 = _foreach_mul_2[8]\n getitem_749 = _foreach_mul_2[9]\n getitem_750 = _foreach_mul_2[10]\n getitem_751 = _foreach_mul_2[11]\n getitem_752 = _foreach_mul_2[12]\n getitem_753 = _foreach_mul_2[13]\n getitem_754 = _foreach_mul_2[14]\n getitem_755 = _foreach_mul_2[15]\n getitem_756 = _foreach_mul_2[16]\n getitem_757 = _foreach_mul_2[17]\n getitem_758 = _foreach_mul_2[18]\n getitem_759 = _foreach_mul_2[19]\n getitem_760 = _foreach_mul_2[20]\n getitem_761 = _foreach_mul_2[21]\n getitem_762 = _foreach_mul_2[22]\n getitem_763 = _foreach_mul_2[23]\n getitem_764 = _foreach_mul_2[24]\n getitem_765 = _foreach_mul_2[25]\n getitem_766 = _foreach_mul_2[26]\n getitem_767 = _foreach_mul_2[27]\n getitem_768 = _foreach_mul_2[28]\n getitem_769 = _foreach_mul_2[29]\n getitem_770 = _foreach_mul_2[30]\n getitem_771 = _foreach_mul_2[31]\n getitem_772 = _foreach_mul_2[32]\n getitem_773 = _foreach_mul_2[33]\n getitem_774 = _foreach_mul_2[34]\n getitem_775 = _foreach_mul_2[35]\n getitem_776 = _foreach_mul_2[36]\n getitem_777 = _foreach_mul_2[37]\n getitem_778 = _foreach_mul_2[38]\n getitem_779 = _foreach_mul_2[39]\n getitem_780 = _foreach_mul_2[40]\n getitem_781 = _foreach_mul_2[41]\n getitem_782 = _foreach_mul_2[42]\n getitem_783 = _foreach_mul_2[43]\n getitem_784 = _foreach_mul_2[44]\n getitem_785 = _foreach_mul_2[45]\n getitem_786 = _foreach_mul_2[46]\n getitem_787 = _foreach_mul_2[47]\n getitem_788 = _foreach_mul_2[48]\n getitem_789 = _foreach_mul_2[49]\n getitem_790 = _foreach_mul_2[50]\n getitem_791 = _foreach_mul_2[51]\n getitem_792 = _foreach_mul_2[52]\n getitem_793 = _foreach_mul_2[53]\n getitem_794 = _foreach_mul_2[54]\n getitem_795 = _foreach_mul_2[55]\n getitem_796 = _foreach_mul_2[56]\n getitem_797 = _foreach_mul_2[57]\n getitem_798 = _foreach_mul_2[58]\n getitem_799 = _foreach_mul_2[59]\n getitem_800 = _foreach_mul_2[60]\n getitem_801 = _foreach_mul_2[61]\n getitem_802 = _foreach_mul_2[62]\n getitem_803 = _foreach_mul_2[63]\n getitem_804 = _foreach_mul_2[64]\n getitem_805 = _foreach_mul_2[65]\n getitem_806 = _foreach_mul_2[66]\n getitem_807 = _foreach_mul_2[67]\n getitem_808 = _foreach_mul_2[68]\n getitem_809 = _foreach_mul_2[69]\n getitem_810 = _foreach_mul_2[70]\n getitem_811 = _foreach_mul_2[71]\n getitem_812 = _foreach_mul_2[72]\n getitem_813 = _foreach_mul_2[73]\n getitem_814 = _foreach_mul_2[74]\n getitem_815 = _foreach_mul_2[75]\n getitem_816 = _foreach_mul_2[76]\n getitem_817 = _foreach_mul_2[77]\n getitem_818 = _foreach_mul_2[78]\n getitem_819 = _foreach_mul_2[79]\n getitem_820 = _foreach_mul_2[80]\n getitem_821 = _foreach_mul_2[81]\n getitem_822 = _foreach_mul_2[82]\n getitem_823 = _foreach_mul_2[83]\n getitem_824 = _foreach_mul_2[84]\n getitem_825 = _foreach_mul_2[85]\n getitem_826 = _foreach_mul_2[86]\n getitem_827 = _foreach_mul_2[87]\n getitem_828 = _foreach_mul_2[88]\n getitem_829 = _foreach_mul_2[89]\n getitem_830 = _foreach_mul_2[90]\n getitem_831 = _foreach_mul_2[91]\n getitem_832 = _foreach_mul_2[92]\n getitem_833 = _foreach_mul_2[93]\n getitem_834 = _foreach_mul_2[94]\n getitem_835 = _foreach_mul_2[95]\n getitem_836 = _foreach_mul_2[96]\n getitem_837 = _foreach_mul_2[97]\n getitem_838 = _foreach_mul_2[98]\n getitem_839 = _foreach_mul_2[99]\n getitem_840 = _foreach_mul_2[100]\n getitem_841 = _foreach_mul_2[101]\n getitem_842 = _foreach_mul_2[102]\n getitem_843 = _foreach_mul_2[103]\n getitem_844 = _foreach_mul_2[104]\n getitem_845 = _foreach_mul_2[105]\n getitem_846 = _foreach_mul_2[106]\n getitem_847 = _foreach_mul_2[107]\n getitem_848 = _foreach_mul_2[108]\n getitem_849 = _foreach_mul_2[109]\n getitem_850 = _foreach_mul_2[110]\n getitem_851 = _foreach_mul_2[111]\n getitem_852 = _foreach_mul_2[112]\n getitem_853 = _foreach_mul_2[113]\n getitem_854 = _foreach_mul_2[114]\n getitem_855 = _foreach_mul_2[115]\n getitem_856 = _foreach_mul_2[116]\n getitem_857 = _foreach_mul_2[117]\n getitem_858 = _foreach_mul_2[118]\n getitem_859 = _foreach_mul_2[119]\n getitem_860 = _foreach_mul_2[120]\n getitem_861 = _foreach_mul_2[121]\n getitem_862 = _foreach_mul_2[122]\n getitem_863 = _foreach_mul_2[123]\n getitem_864 = _foreach_mul_2[124]\n getitem_865 = _foreach_mul_2[125]\n getitem_866 = _foreach_mul_2[126]\n getitem_867 = _foreach_mul_2[127]\n getitem_868 = _foreach_mul_2[128]\n getitem_869 = _foreach_mul_2[129]\n getitem_870 = _foreach_mul_2[130]\n getitem_871 = _foreach_mul_2[131]\n getitem_872 = _foreach_mul_2[132]\n getitem_873 = _foreach_mul_2[133]\n getitem_874 = _foreach_mul_2[134]\n getitem_875 = _foreach_mul_2[135]\n getitem_876 = _foreach_mul_2[136]\n getitem_877 = _foreach_mul_2[137]\n getitem_878 = _foreach_mul_2[138]\n getitem_879 = _foreach_mul_2[139]\n getitem_880 = _foreach_mul_2[140]\n getitem_881 = _foreach_mul_2[141]\n getitem_882 = _foreach_mul_2[142]\n getitem_883 = _foreach_mul_2[143]\n getitem_884 = _foreach_mul_2[144]\n getitem_885 = _foreach_mul_2[145]\n getitem_886 = _foreach_mul_2[146]\n getitem_887 = _foreach_mul_2[147]; _foreach_mul_2 = None\n _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None\n getitem_888 = _foreach_add_2[0]\n getitem_889 = _foreach_add_2[1]\n getitem_890 = _foreach_add_2[2]\n getitem_891 = _foreach_add_2[3]\n getitem_892 = _foreach_add_2[4]\n getitem_893 = _foreach_add_2[5]\n getitem_894 = _foreach_add_2[6]\n getitem_895 = _foreach_add_2[7]\n getitem_896 = _foreach_add_2[8]\n getitem_897 = _foreach_add_2[9]\n getitem_898 = _foreach_add_2[10]\n getitem_899 = _foreach_add_2[11]\n getitem_900 = _foreach_add_2[12]\n getitem_901 = _foreach_add_2[13]\n getitem_902 = _foreach_add_2[14]\n getitem_903 = _foreach_add_2[15]\n getitem_904 = _foreach_add_2[16]\n getitem_905 = _foreach_add_2[17]\n getitem_906 = _foreach_add_2[18]\n getitem_907 = _foreach_add_2[19]\n getitem_908 = _foreach_add_2[20]\n getitem_909 = _foreach_add_2[21]\n getitem_910 = _foreach_add_2[22]\n getitem_911 = _foreach_add_2[23]\n getitem_912 = _foreach_add_2[24]\n getitem_913 = _foreach_add_2[25]\n getitem_914 = _foreach_add_2[26]\n getitem_915 = _foreach_add_2[27]\n getitem_916 = _foreach_add_2[28]\n getitem_917 = _foreach_add_2[29]\n getitem_918 = _foreach_add_2[30]\n getitem_919 = _foreach_add_2[31]\n getitem_920 = _foreach_add_2[32]\n getitem_921 = _foreach_add_2[33]\n getitem_922 = _foreach_add_2[34]\n getitem_923 = _foreach_add_2[35]\n getitem_924 = _foreach_add_2[36]\n getitem_925 = _foreach_add_2[37]\n getitem_926 = _foreach_add_2[38]\n getitem_927 = _foreach_add_2[39]\n getitem_928 = _foreach_add_2[40]\n getitem_929 = _foreach_add_2[41]\n getitem_930 = _foreach_add_2[42]\n getitem_931 = _foreach_add_2[43]\n getitem_932 = _foreach_add_2[44]\n getitem_933 = _foreach_add_2[45]\n getitem_934 = _foreach_add_2[46]\n getitem_935 = _foreach_add_2[47]\n getitem_936 = _foreach_add_2[48]\n getitem_937 = _foreach_add_2[49]\n getitem_938 = _foreach_add_2[50]\n getitem_939 = _foreach_add_2[51]\n getitem_940 = _foreach_add_2[52]\n getitem_941 = _foreach_add_2[53]\n getitem_942 = _foreach_add_2[54]\n getitem_943 = _foreach_add_2[55]\n getitem_944 = _foreach_add_2[56]\n getitem_945 = _foreach_add_2[57]\n getitem_946 = _foreach_add_2[58]\n getitem_947 = _foreach_add_2[59]\n getitem_948 = _foreach_add_2[60]\n getitem_949 = _foreach_add_2[61]\n getitem_950 = _foreach_add_2[62]\n getitem_951 = _foreach_add_2[63]\n getitem_952 = _foreach_add_2[64]\n getitem_953 = _foreach_add_2[65]\n getitem_954 = _foreach_add_2[66]\n getitem_955 = _foreach_add_2[67]\n getitem_956 = _foreach_add_2[68]\n getitem_957 = _foreach_add_2[69]\n getitem_958 = _foreach_add_2[70]\n getitem_959 = _foreach_add_2[71]\n getitem_960 = _foreach_add_2[72]\n getitem_961 = _foreach_add_2[73]\n getitem_962 = _foreach_add_2[74]\n getitem_963 = _foreach_add_2[75]\n getitem_964 = _foreach_add_2[76]\n getitem_965 = _foreach_add_2[77]\n getitem_966 = _foreach_add_2[78]\n getitem_967 = _foreach_add_2[79]\n getitem_968 = _foreach_add_2[80]\n getitem_969 = _foreach_add_2[81]\n getitem_970 = _foreach_add_2[82]\n getitem_971 = _foreach_add_2[83]\n getitem_972 = _foreach_add_2[84]\n getitem_973 = _foreach_add_2[85]\n getitem_974 = _foreach_add_2[86]\n getitem_975 = _foreach_add_2[87]\n getitem_976 = _foreach_add_2[88]\n getitem_977 = _foreach_add_2[89]\n getitem_978 = _foreach_add_2[90]\n getitem_979 = _foreach_add_2[91]\n getitem_980 = _foreach_add_2[92]\n getitem_981 = _foreach_add_2[93]\n getitem_982 = _foreach_add_2[94]\n getitem_983 = _foreach_add_2[95]\n getitem_984 = _foreach_add_2[96]\n getitem_985 = _foreach_add_2[97]\n getitem_986 = _foreach_add_2[98]\n getitem_987 = _foreach_add_2[99]\n getitem_988 = _foreach_add_2[100]\n getitem_989 = _foreach_add_2[101]\n getitem_990 = _foreach_add_2[102]\n getitem_991 = _foreach_add_2[103]\n getitem_992 = _foreach_add_2[104]\n getitem_993 = _foreach_add_2[105]\n getitem_994 = _foreach_add_2[106]\n getitem_995 = _foreach_add_2[107]\n getitem_996 = _foreach_add_2[108]\n getitem_997 = _foreach_add_2[109]\n getitem_998 = _foreach_add_2[110]\n getitem_999 = _foreach_add_2[111]\n getitem_1000 = _foreach_add_2[112]\n getitem_1001 = _foreach_add_2[113]\n getitem_1002 = _foreach_add_2[114]\n getitem_1003 = _foreach_add_2[115]\n getitem_1004 = _foreach_add_2[116]\n getitem_1005 = _foreach_add_2[117]\n getitem_1006 = _foreach_add_2[118]\n getitem_1007 = _foreach_add_2[119]\n getitem_1008 = _foreach_add_2[120]\n getitem_1009 = _foreach_add_2[121]\n getitem_1010 = _foreach_add_2[122]\n getitem_1011 = _foreach_add_2[123]\n getitem_1012 = _foreach_add_2[124]\n getitem_1013 = _foreach_add_2[125]\n getitem_1014 = _foreach_add_2[126]\n getitem_1015 = _foreach_add_2[127]\n getitem_1016 = _foreach_add_2[128]\n getitem_1017 = _foreach_add_2[129]\n getitem_1018 = _foreach_add_2[130]\n getitem_1019 = _foreach_add_2[131]\n getitem_1020 = _foreach_add_2[132]\n getitem_1021 = _foreach_add_2[133]\n getitem_1022 = _foreach_add_2[134]\n getitem_1023 = _foreach_add_2[135]\n getitem_1024 = _foreach_add_2[136]\n getitem_1025 = _foreach_add_2[137]\n getitem_1026 = _foreach_add_2[138]\n getitem_1027 = _foreach_add_2[139]\n getitem_1028 = _foreach_add_2[140]\n getitem_1029 = _foreach_add_2[141]\n getitem_1030 = _foreach_add_2[142]\n getitem_1031 = _foreach_add_2[143]\n getitem_1032 = _foreach_add_2[144]\n getitem_1033 = _foreach_add_2[145]\n getitem_1034 = _foreach_add_2[146]\n getitem_1035 = _foreach_add_2[147]; _foreach_add_2 = None\n _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n getitem_1036 = _foreach_pow[0]\n getitem_1037 = _foreach_pow[1]\n getitem_1038 = _foreach_pow[2]\n getitem_1039 = _foreach_pow[3]\n getitem_1040 = _foreach_pow[4]\n getitem_1041 = _foreach_pow[5]\n getitem_1042 = _foreach_pow[6]\n getitem_1043 = _foreach_pow[7]\n getitem_1044 = _foreach_pow[8]\n getitem_1045 = _foreach_pow[9]\n getitem_1046 = _foreach_pow[10]\n getitem_1047 = _foreach_pow[11]\n getitem_1048 = _foreach_pow[12]\n getitem_1049 = _foreach_pow[13]\n getitem_1050 = _foreach_pow[14]\n getitem_1051 = _foreach_pow[15]\n getitem_1052 = _foreach_pow[16]\n getitem_1053 = _foreach_pow[17]\n getitem_1054 = _foreach_pow[18]\n getitem_1055 = _foreach_pow[19]\n getitem_1056 = _foreach_pow[20]\n getitem_1057 = _foreach_pow[21]\n getitem_1058 = _foreach_pow[22]\n getitem_1059 = _foreach_pow[23]\n getitem_1060 = _foreach_pow[24]\n getitem_1061 = _foreach_pow[25]\n getitem_1062 = _foreach_pow[26]\n getitem_1063 = _foreach_pow[27]\n getitem_1064 = _foreach_pow[28]\n getitem_1065 = _foreach_pow[29]\n getitem_1066 = _foreach_pow[30]\n getitem_1067 = _foreach_pow[31]\n getitem_1068 = _foreach_pow[32]\n getitem_1069 = _foreach_pow[33]\n getitem_1070 = _foreach_pow[34]\n getitem_1071 = _foreach_pow[35]\n getitem_1072 = _foreach_pow[36]\n getitem_1073 = _foreach_pow[37]\n getitem_1074 = _foreach_pow[38]\n getitem_1075 = _foreach_pow[39]\n getitem_1076 = _foreach_pow[40]\n getitem_1077 = _foreach_pow[41]\n getitem_1078 = _foreach_pow[42]\n getitem_1079 = _foreach_pow[43]\n getitem_1080 = _foreach_pow[44]\n getitem_1081 = _foreach_pow[45]\n getitem_1082 = _foreach_pow[46]\n getitem_1083 = _foreach_pow[47]\n getitem_1084 = _foreach_pow[48]\n getitem_1085 = _foreach_pow[49]\n getitem_1086 = _foreach_pow[50]\n getitem_1087 = _foreach_pow[51]\n getitem_1088 = _foreach_pow[52]\n getitem_1089 = _foreach_pow[53]\n getitem_1090 = _foreach_pow[54]\n getitem_1091 = _foreach_pow[55]\n getitem_1092 = _foreach_pow[56]\n getitem_1093 = _foreach_pow[57]\n getitem_1094 = _foreach_pow[58]\n getitem_1095 = _foreach_pow[59]\n getitem_1096 = _foreach_pow[60]\n getitem_1097 = _foreach_pow[61]\n getitem_1098 = _foreach_pow[62]\n getitem_1099 = _foreach_pow[63]\n getitem_1100 = _foreach_pow[64]\n getitem_1101 = _foreach_pow[65]\n getitem_1102 = _foreach_pow[66]\n getitem_1103 = _foreach_pow[67]\n getitem_1104 = _foreach_pow[68]\n getitem_1105 = _foreach_pow[69]\n getitem_1106 = _foreach_pow[70]\n getitem_1107 = _foreach_pow[71]\n getitem_1108 = _foreach_pow[72]\n getitem_1109 = _foreach_pow[73]\n getitem_1110 = _foreach_pow[74]\n getitem_1111 = _foreach_pow[75]\n getitem_1112 = _foreach_pow[76]\n getitem_1113 = _foreach_pow[77]\n getitem_1114 = _foreach_pow[78]\n getitem_1115 = _foreach_pow[79]\n getitem_1116 = _foreach_pow[80]\n getitem_1117 = _foreach_pow[81]\n getitem_1118 = _foreach_pow[82]\n getitem_1119 = _foreach_pow[83]\n getitem_1120 = _foreach_pow[84]\n getitem_1121 = _foreach_pow[85]\n getitem_1122 = _foreach_pow[86]\n getitem_1123 = _foreach_pow[87]\n getitem_1124 = _foreach_pow[88]\n getitem_1125 = _foreach_pow[89]\n getitem_1126 = _foreach_pow[90]\n getitem_1127 = _foreach_pow[91]\n getitem_1128 = _foreach_pow[92]\n getitem_1129 = _foreach_pow[93]\n getitem_1130 = _foreach_pow[94]\n getitem_1131 = _foreach_pow[95]\n getitem_1132 = _foreach_pow[96]\n getitem_1133 = _foreach_pow[97]\n getitem_1134 = _foreach_pow[98]\n getitem_1135 = _foreach_pow[99]\n getitem_1136 = _foreach_pow[100]\n getitem_1137 = _foreach_pow[101]\n getitem_1138 = _foreach_pow[102]\n getitem_1139 = _foreach_pow[103]\n getitem_1140 = _foreach_pow[104]\n getitem_1141 = _foreach_pow[105]\n getitem_1142 = _foreach_pow[106]\n getitem_1143 = _foreach_pow[107]\n getitem_1144 = _foreach_pow[108]\n getitem_1145 = _foreach_pow[109]\n getitem_1146 = _foreach_pow[110]\n getitem_1147 = _foreach_pow[111]\n getitem_1148 = _foreach_pow[112]\n getitem_1149 = _foreach_pow[113]\n getitem_1150 = _foreach_pow[114]\n getitem_1151 = _foreach_pow[115]\n getitem_1152 = _foreach_pow[116]\n getitem_1153 = _foreach_pow[117]\n getitem_1154 = _foreach_pow[118]\n getitem_1155 = _foreach_pow[119]\n getitem_1156 = _foreach_pow[120]\n getitem_1157 = _foreach_pow[121]\n getitem_1158 = _foreach_pow[122]\n getitem_1159 = _foreach_pow[123]\n getitem_1160 = _foreach_pow[124]\n getitem_1161 = _foreach_pow[125]\n getitem_1162 = _foreach_pow[126]\n getitem_1163 = _foreach_pow[127]\n getitem_1164 = _foreach_pow[128]\n getitem_1165 = _foreach_pow[129]\n getitem_1166 = _foreach_pow[130]\n getitem_1167 = _foreach_pow[131]\n getitem_1168 = _foreach_pow[132]\n getitem_1169 = _foreach_pow[133]\n getitem_1170 = _foreach_pow[134]\n getitem_1171 = _foreach_pow[135]\n getitem_1172 = _foreach_pow[136]\n getitem_1173 = _foreach_pow[137]\n getitem_1174 = _foreach_pow[138]\n getitem_1175 = _foreach_pow[139]\n getitem_1176 = _foreach_pow[140]\n getitem_1177 = _foreach_pow[141]\n getitem_1178 = _foreach_pow[142]\n getitem_1179 = _foreach_pow[143]\n getitem_1180 = _foreach_pow[144]\n getitem_1181 = _foreach_pow[145]\n getitem_1182 = _foreach_pow[146]\n getitem_1183 = _foreach_pow[147]; _foreach_pow = None\n _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n getitem_1184 = _foreach_pow_1[0]\n getitem_1185 = _foreach_pow_1[1]\n getitem_1186 = _foreach_pow_1[2]\n getitem_1187 = _foreach_pow_1[3]\n getitem_1188 = _foreach_pow_1[4]\n getitem_1189 = _foreach_pow_1[5]\n getitem_1190 = _foreach_pow_1[6]\n getitem_1191 = _foreach_pow_1[7]\n getitem_1192 = _foreach_pow_1[8]\n getitem_1193 = _foreach_pow_1[9]\n getitem_1194 = _foreach_pow_1[10]\n getitem_1195 = _foreach_pow_1[11]\n getitem_1196 = _foreach_pow_1[12]\n getitem_1197 = _foreach_pow_1[13]\n getitem_1198 = _foreach_pow_1[14]\n getitem_1199 = _foreach_pow_1[15]\n getitem_1200 = _foreach_pow_1[16]\n getitem_1201 = _foreach_pow_1[17]\n getitem_1202 = _foreach_pow_1[18]\n getitem_1203 = _foreach_pow_1[19]\n getitem_1204 = _foreach_pow_1[20]\n getitem_1205 = _foreach_pow_1[21]\n getitem_1206 = _foreach_pow_1[22]\n getitem_1207 = _foreach_pow_1[23]\n getitem_1208 = _foreach_pow_1[24]\n getitem_1209 = _foreach_pow_1[25]\n getitem_1210 = _foreach_pow_1[26]\n getitem_1211 = _foreach_pow_1[27]\n getitem_1212 = _foreach_pow_1[28]\n getitem_1213 = _foreach_pow_1[29]\n getitem_1214 = _foreach_pow_1[30]\n getitem_1215 = _foreach_pow_1[31]\n getitem_1216 = _foreach_pow_1[32]\n getitem_1217 = _foreach_pow_1[33]\n getitem_1218 = _foreach_pow_1[34]\n getitem_1219 = _foreach_pow_1[35]\n getitem_1220 = _foreach_pow_1[36]\n getitem_1221 = _foreach_pow_1[37]\n getitem_1222 = _foreach_pow_1[38]\n getitem_1223 = _foreach_pow_1[39]\n getitem_1224 = _foreach_pow_1[40]\n getitem_1225 = _foreach_pow_1[41]\n getitem_1226 = _foreach_pow_1[42]\n getitem_1227 = _foreach_pow_1[43]\n getitem_1228 = _foreach_pow_1[44]\n getitem_1229 = _foreach_pow_1[45]\n getitem_1230 = _foreach_pow_1[46]\n getitem_1231 = _foreach_pow_1[47]\n getitem_1232 = _foreach_pow_1[48]\n getitem_1233 = _foreach_pow_1[49]\n getitem_1234 = _foreach_pow_1[50]\n getitem_1235 = _foreach_pow_1[51]\n getitem_1236 = _foreach_pow_1[52]\n getitem_1237 = _foreach_pow_1[53]\n getitem_1238 = _foreach_pow_1[54]\n getitem_1239 = _foreach_pow_1[55]\n getitem_1240 = _foreach_pow_1[56]\n getitem_1241 = _foreach_pow_1[57]\n getitem_1242 = _foreach_pow_1[58]\n getitem_1243 = _foreach_pow_1[59]\n getitem_1244 = _foreach_pow_1[60]\n getitem_1245 = _foreach_pow_1[61]\n getitem_1246 = _foreach_pow_1[62]\n getitem_1247 = _foreach_pow_1[63]\n getitem_1248 = _foreach_pow_1[64]\n getitem_1249 = _foreach_pow_1[65]\n getitem_1250 = _foreach_pow_1[66]\n getitem_1251 = _foreach_pow_1[67]\n getitem_1252 = _foreach_pow_1[68]\n getitem_1253 = _foreach_pow_1[69]\n getitem_1254 = _foreach_pow_1[70]\n getitem_1255 = _foreach_pow_1[71]\n getitem_1256 = _foreach_pow_1[72]\n getitem_1257 = _foreach_pow_1[73]\n getitem_1258 = _foreach_pow_1[74]\n getitem_1259 = _foreach_pow_1[75]\n getitem_1260 = _foreach_pow_1[76]\n getitem_1261 = _foreach_pow_1[77]\n getitem_1262 = _foreach_pow_1[78]\n getitem_1263 = _foreach_pow_1[79]\n getitem_1264 = _foreach_pow_1[80]\n getitem_1265 = _foreach_pow_1[81]\n getitem_1266 = _foreach_pow_1[82]\n getitem_1267 = _foreach_pow_1[83]\n getitem_1268 = _foreach_pow_1[84]\n getitem_1269 = _foreach_pow_1[85]\n getitem_1270 = _foreach_pow_1[86]\n getitem_1271 = _foreach_pow_1[87]\n getitem_1272 = _foreach_pow_1[88]\n getitem_1273 = _foreach_pow_1[89]\n getitem_1274 = _foreach_pow_1[90]\n getitem_1275 = _foreach_pow_1[91]\n getitem_1276 = _foreach_pow_1[92]\n getitem_1277 = _foreach_pow_1[93]\n getitem_1278 = _foreach_pow_1[94]\n getitem_1279 = _foreach_pow_1[95]\n getitem_1280 = _foreach_pow_1[96]\n getitem_1281 = _foreach_pow_1[97]\n getitem_1282 = _foreach_pow_1[98]\n getitem_1283 = _foreach_pow_1[99]\n getitem_1284 = _foreach_pow_1[100]\n getitem_1285 = _foreach_pow_1[101]\n getitem_1286 = _foreach_pow_1[102]\n getitem_1287 = _foreach_pow_1[103]\n getitem_1288 = _foreach_pow_1[104]\n getitem_1289 = _foreach_pow_1[105]\n getitem_1290 = _foreach_pow_1[106]\n getitem_1291 = _foreach_pow_1[107]\n getitem_1292 = _foreach_pow_1[108]\n getitem_1293 = _foreach_pow_1[109]\n getitem_1294 = _foreach_pow_1[110]\n getitem_1295 = _foreach_pow_1[111]\n getitem_1296 = _foreach_pow_1[112]\n getitem_1297 = _foreach_pow_1[113]\n getitem_1298 = _foreach_pow_1[114]\n getitem_1299 = _foreach_pow_1[115]\n getitem_1300 = _foreach_pow_1[116]\n getitem_1301 = _foreach_pow_1[117]\n getitem_1302 = _foreach_pow_1[118]\n getitem_1303 = _foreach_pow_1[119]\n getitem_1304 = _foreach_pow_1[120]\n getitem_1305 = _foreach_pow_1[121]\n getitem_1306 = _foreach_pow_1[122]\n getitem_1307 = _foreach_pow_1[123]\n getitem_1308 = _foreach_pow_1[124]\n getitem_1309 = _foreach_pow_1[125]\n getitem_1310 = _foreach_pow_1[126]\n getitem_1311 = _foreach_pow_1[127]\n getitem_1312 = _foreach_pow_1[128]\n getitem_1313 = _foreach_pow_1[129]\n getitem_1314 = _foreach_pow_1[130]\n getitem_1315 = _foreach_pow_1[131]\n getitem_1316 = _foreach_pow_1[132]\n getitem_1317 = _foreach_pow_1[133]\n getitem_1318 = _foreach_pow_1[134]\n getitem_1319 = _foreach_pow_1[135]\n getitem_1320 = _foreach_pow_1[136]\n getitem_1321 = _foreach_pow_1[137]\n getitem_1322 = _foreach_pow_1[138]\n getitem_1323 = _foreach_pow_1[139]\n getitem_1324 = _foreach_pow_1[140]\n getitem_1325 = _foreach_pow_1[141]\n getitem_1326 = _foreach_pow_1[142]\n getitem_1327 = _foreach_pow_1[143]\n getitem_1328 = _foreach_pow_1[144]\n getitem_1329 = _foreach_pow_1[145]\n getitem_1330 = _foreach_pow_1[146]\n getitem_1331 = _foreach_pow_1[147]; _foreach_pow_1 = None\n _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1); getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None\n getitem_1332 = _foreach_sub_1[0]\n getitem_1333 = _foreach_sub_1[1]\n getitem_1334 = _foreach_sub_1[2]\n getitem_1335 = _foreach_sub_1[3]\n getitem_1336 = _foreach_sub_1[4]\n getitem_1337 = _foreach_sub_1[5]\n getitem_1338 = _foreach_sub_1[6]\n getitem_1339 = _foreach_sub_1[7]\n getitem_1340 = _foreach_sub_1[8]\n getitem_1341 = _foreach_sub_1[9]\n getitem_1342 = _foreach_sub_1[10]\n getitem_1343 = _foreach_sub_1[11]\n getitem_1344 = _foreach_sub_1[12]\n getitem_1345 = _foreach_sub_1[13]\n getitem_1346 = _foreach_sub_1[14]\n getitem_1347 = _foreach_sub_1[15]\n getitem_1348 = _foreach_sub_1[16]\n getitem_1349 = _foreach_sub_1[17]\n getitem_1350 = _foreach_sub_1[18]\n getitem_1351 = _foreach_sub_1[19]\n getitem_1352 = _foreach_sub_1[20]\n getitem_1353 = _foreach_sub_1[21]\n getitem_1354 = _foreach_sub_1[22]\n getitem_1355 = _foreach_sub_1[23]\n getitem_1356 = _foreach_sub_1[24]\n getitem_1357 = _foreach_sub_1[25]\n getitem_1358 = _foreach_sub_1[26]\n getitem_1359 = _foreach_sub_1[27]\n getitem_1360 = _foreach_sub_1[28]\n getitem_1361 = _foreach_sub_1[29]\n getitem_1362 = _foreach_sub_1[30]\n getitem_1363 = _foreach_sub_1[31]\n getitem_1364 = _foreach_sub_1[32]\n getitem_1365 = _foreach_sub_1[33]\n getitem_1366 = _foreach_sub_1[34]\n getitem_1367 = _foreach_sub_1[35]\n getitem_1368 = _foreach_sub_1[36]\n getitem_1369 = _foreach_sub_1[37]\n getitem_1370 = _foreach_sub_1[38]\n getitem_1371 = _foreach_sub_1[39]\n getitem_1372 = _foreach_sub_1[40]\n getitem_1373 = _foreach_sub_1[41]\n getitem_1374 = _foreach_sub_1[42]\n getitem_1375 = _foreach_sub_1[43]\n getitem_1376 = _foreach_sub_1[44]\n getitem_1377 = _foreach_sub_1[45]\n getitem_1378 = _foreach_sub_1[46]\n getitem_1379 = _foreach_sub_1[47]\n getitem_1380 = _foreach_sub_1[48]\n getitem_1381 = _foreach_sub_1[49]\n getitem_1382 = _foreach_sub_1[50]\n getitem_1383 = _foreach_sub_1[51]\n getitem_1384 = _foreach_sub_1[52]\n getitem_1385 = _foreach_sub_1[53]\n getitem_1386 = _foreach_sub_1[54]\n getitem_1387 = _foreach_sub_1[55]\n getitem_1388 = _foreach_sub_1[56]\n getitem_1389 = _foreach_sub_1[57]\n getitem_1390 = _foreach_sub_1[58]\n getitem_1391 = _foreach_sub_1[59]\n getitem_1392 = _foreach_sub_1[60]\n getitem_1393 = _foreach_sub_1[61]\n getitem_1394 = _foreach_sub_1[62]\n getitem_1395 = _foreach_sub_1[63]\n getitem_1396 = _foreach_sub_1[64]\n getitem_1397 = _foreach_sub_1[65]\n getitem_1398 = _foreach_sub_1[66]\n getitem_1399 = _foreach_sub_1[67]\n getitem_1400 = _foreach_sub_1[68]\n getitem_1401 = _foreach_sub_1[69]\n getitem_1402 = _foreach_sub_1[70]\n getitem_1403 = _foreach_sub_1[71]\n getitem_1404 = _foreach_sub_1[72]\n getitem_1405 = _foreach_sub_1[73]\n getitem_1406 = _foreach_sub_1[74]\n getitem_1407 = _foreach_sub_1[75]\n getitem_1408 = _foreach_sub_1[76]\n getitem_1409 = _foreach_sub_1[77]\n getitem_1410 = _foreach_sub_1[78]\n getitem_1411 = _foreach_sub_1[79]\n getitem_1412 = _foreach_sub_1[80]\n getitem_1413 = _foreach_sub_1[81]\n getitem_1414 = _foreach_sub_1[82]\n getitem_1415 = _foreach_sub_1[83]\n getitem_1416 = _foreach_sub_1[84]\n getitem_1417 = _foreach_sub_1[85]\n getitem_1418 = _foreach_sub_1[86]\n getitem_1419 = _foreach_sub_1[87]\n getitem_1420 = _foreach_sub_1[88]\n getitem_1421 = _foreach_sub_1[89]\n getitem_1422 = _foreach_sub_1[90]\n getitem_1423 = _foreach_sub_1[91]\n getitem_1424 = _foreach_sub_1[92]\n getitem_1425 = _foreach_sub_1[93]\n getitem_1426 = _foreach_sub_1[94]\n getitem_1427 = _foreach_sub_1[95]\n getitem_1428 = _foreach_sub_1[96]\n getitem_1429 = _foreach_sub_1[97]\n getitem_1430 = _foreach_sub_1[98]\n getitem_1431 = _foreach_sub_1[99]\n getitem_1432 = _foreach_sub_1[100]\n getitem_1433 = _foreach_sub_1[101]\n getitem_1434 = _foreach_sub_1[102]\n getitem_1435 = _foreach_sub_1[103]\n getitem_1436 = _foreach_sub_1[104]\n getitem_1437 = _foreach_sub_1[105]\n getitem_1438 = _foreach_sub_1[106]\n getitem_1439 = _foreach_sub_1[107]\n getitem_1440 = _foreach_sub_1[108]\n getitem_1441 = _foreach_sub_1[109]\n getitem_1442 = _foreach_sub_1[110]\n getitem_1443 = _foreach_sub_1[111]\n getitem_1444 = _foreach_sub_1[112]\n getitem_1445 = _foreach_sub_1[113]\n getitem_1446 = _foreach_sub_1[114]\n getitem_1447 = _foreach_sub_1[115]\n getitem_1448 = _foreach_sub_1[116]\n getitem_1449 = _foreach_sub_1[117]\n getitem_1450 = _foreach_sub_1[118]\n getitem_1451 = _foreach_sub_1[119]\n getitem_1452 = _foreach_sub_1[120]\n getitem_1453 = _foreach_sub_1[121]\n getitem_1454 = _foreach_sub_1[122]\n getitem_1455 = _foreach_sub_1[123]\n getitem_1456 = _foreach_sub_1[124]\n getitem_1457 = _foreach_sub_1[125]\n getitem_1458 = _foreach_sub_1[126]\n getitem_1459 = _foreach_sub_1[127]\n getitem_1460 = _foreach_sub_1[128]\n getitem_1461 = _foreach_sub_1[129]\n getitem_1462 = _foreach_sub_1[130]\n getitem_1463 = _foreach_sub_1[131]\n getitem_1464 = _foreach_sub_1[132]\n getitem_1465 = _foreach_sub_1[133]\n getitem_1466 = _foreach_sub_1[134]\n getitem_1467 = _foreach_sub_1[135]\n getitem_1468 = _foreach_sub_1[136]\n getitem_1469 = _foreach_sub_1[137]\n getitem_1470 = _foreach_sub_1[138]\n getitem_1471 = _foreach_sub_1[139]\n getitem_1472 = _foreach_sub_1[140]\n getitem_1473 = _foreach_sub_1[141]\n getitem_1474 = _foreach_sub_1[142]\n getitem_1475 = _foreach_sub_1[143]\n getitem_1476 = _foreach_sub_1[144]\n getitem_1477 = _foreach_sub_1[145]\n getitem_1478 = _foreach_sub_1[146]\n getitem_1479 = _foreach_sub_1[147]; _foreach_sub_1 = None\n _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1); getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None\n getitem_1480 = _foreach_sub_2[0]\n getitem_1481 = _foreach_sub_2[1]\n getitem_1482 = _foreach_sub_2[2]\n getitem_1483 = _foreach_sub_2[3]\n getitem_1484 = _foreach_sub_2[4]\n getitem_1485 = _foreach_sub_2[5]\n getitem_1486 = _foreach_sub_2[6]\n getitem_1487 = _foreach_sub_2[7]\n getitem_1488 = _foreach_sub_2[8]\n getitem_1489 = _foreach_sub_2[9]\n getitem_1490 = _foreach_sub_2[10]\n getitem_1491 = _foreach_sub_2[11]\n getitem_1492 = _foreach_sub_2[12]\n getitem_1493 = _foreach_sub_2[13]\n getitem_1494 = _foreach_sub_2[14]\n getitem_1495 = _foreach_sub_2[15]\n getitem_1496 = _foreach_sub_2[16]\n getitem_1497 = _foreach_sub_2[17]\n getitem_1498 = _foreach_sub_2[18]\n getitem_1499 = _foreach_sub_2[19]\n getitem_1500 = _foreach_sub_2[20]\n getitem_1501 = _foreach_sub_2[21]\n getitem_1502 = _foreach_sub_2[22]\n getitem_1503 = _foreach_sub_2[23]\n getitem_1504 = _foreach_sub_2[24]\n getitem_1505 = _foreach_sub_2[25]\n getitem_1506 = _foreach_sub_2[26]\n getitem_1507 = _foreach_sub_2[27]\n getitem_1508 = _foreach_sub_2[28]\n getitem_1509 = _foreach_sub_2[29]\n getitem_1510 = _foreach_sub_2[30]\n getitem_1511 = _foreach_sub_2[31]\n getitem_1512 = _foreach_sub_2[32]\n getitem_1513 = _foreach_sub_2[33]\n getitem_1514 = _foreach_sub_2[34]\n getitem_1515 = _foreach_sub_2[35]\n getitem_1516 = _foreach_sub_2[36]\n getitem_1517 = _foreach_sub_2[37]\n getitem_1518 = _foreach_sub_2[38]\n getitem_1519 = _foreach_sub_2[39]\n getitem_1520 = _foreach_sub_2[40]\n getitem_1521 = _foreach_sub_2[41]\n getitem_1522 = _foreach_sub_2[42]\n getitem_1523 = _foreach_sub_2[43]\n getitem_1524 = _foreach_sub_2[44]\n getitem_1525 = _foreach_sub_2[45]\n getitem_1526 = _foreach_sub_2[46]\n getitem_1527 = _foreach_sub_2[47]\n getitem_1528 = _foreach_sub_2[48]\n getitem_1529 = _foreach_sub_2[49]\n getitem_1530 = _foreach_sub_2[50]\n getitem_1531 = _foreach_sub_2[51]\n getitem_1532 = _foreach_sub_2[52]\n getitem_1533 = _foreach_sub_2[53]\n getitem_1534 = _foreach_sub_2[54]\n getitem_1535 = _foreach_sub_2[55]\n getitem_1536 = _foreach_sub_2[56]\n getitem_1537 = _foreach_sub_2[57]\n getitem_1538 = _foreach_sub_2[58]\n getitem_1539 = _foreach_sub_2[59]\n getitem_1540 = _foreach_sub_2[60]\n getitem_1541 = _foreach_sub_2[61]\n getitem_1542 = _foreach_sub_2[62]\n getitem_1543 = _foreach_sub_2[63]\n getitem_1544 = _foreach_sub_2[64]\n getitem_1545 = _foreach_sub_2[65]\n getitem_1546 = _foreach_sub_2[66]\n getitem_1547 = _foreach_sub_2[67]\n getitem_1548 = _foreach_sub_2[68]\n getitem_1549 = _foreach_sub_2[69]\n getitem_1550 = _foreach_sub_2[70]\n getitem_1551 = _foreach_sub_2[71]\n getitem_1552 = _foreach_sub_2[72]\n getitem_1553 = _foreach_sub_2[73]\n getitem_1554 = _foreach_sub_2[74]\n getitem_1555 = _foreach_sub_2[75]\n getitem_1556 = _foreach_sub_2[76]\n getitem_1557 = _foreach_sub_2[77]\n getitem_1558 = _foreach_sub_2[78]\n getitem_1559 = _foreach_sub_2[79]\n getitem_1560 = _foreach_sub_2[80]\n getitem_1561 = _foreach_sub_2[81]\n getitem_1562 = _foreach_sub_2[82]\n getitem_1563 = _foreach_sub_2[83]\n getitem_1564 = _foreach_sub_2[84]\n getitem_1565 = _foreach_sub_2[85]\n getitem_1566 = _foreach_sub_2[86]\n getitem_1567 = _foreach_sub_2[87]\n getitem_1568 = _foreach_sub_2[88]\n getitem_1569 = _foreach_sub_2[89]\n getitem_1570 = _foreach_sub_2[90]\n getitem_1571 = _foreach_sub_2[91]\n getitem_1572 = _foreach_sub_2[92]\n getitem_1573 = _foreach_sub_2[93]\n getitem_1574 = _foreach_sub_2[94]\n getitem_1575 = _foreach_sub_2[95]\n getitem_1576 = _foreach_sub_2[96]\n getitem_1577 = _foreach_sub_2[97]\n getitem_1578 = _foreach_sub_2[98]\n getitem_1579 = _foreach_sub_2[99]\n getitem_1580 = _foreach_sub_2[100]\n getitem_1581 = _foreach_sub_2[101]\n getitem_1582 = _foreach_sub_2[102]\n getitem_1583 = _foreach_sub_2[103]\n getitem_1584 = _foreach_sub_2[104]\n getitem_1585 = _foreach_sub_2[105]\n getitem_1586 = _foreach_sub_2[106]\n getitem_1587 = _foreach_sub_2[107]\n getitem_1588 = _foreach_sub_2[108]\n getitem_1589 = _foreach_sub_2[109]\n getitem_1590 = _foreach_sub_2[110]\n getitem_1591 = _foreach_sub_2[111]\n getitem_1592 = _foreach_sub_2[112]\n getitem_1593 = _foreach_sub_2[113]\n getitem_1594 = _foreach_sub_2[114]\n getitem_1595 = _foreach_sub_2[115]\n getitem_1596 = _foreach_sub_2[116]\n getitem_1597 = _foreach_sub_2[117]\n getitem_1598 = _foreach_sub_2[118]\n getitem_1599 = _foreach_sub_2[119]\n getitem_1600 = _foreach_sub_2[120]\n getitem_1601 = _foreach_sub_2[121]\n getitem_1602 = _foreach_sub_2[122]\n getitem_1603 = _foreach_sub_2[123]\n getitem_1604 = _foreach_sub_2[124]\n getitem_1605 = _foreach_sub_2[125]\n getitem_1606 = _foreach_sub_2[126]\n getitem_1607 = _foreach_sub_2[127]\n getitem_1608 = _foreach_sub_2[128]\n getitem_1609 = _foreach_sub_2[129]\n getitem_1610 = _foreach_sub_2[130]\n getitem_1611 = _foreach_sub_2[131]\n getitem_1612 = _foreach_sub_2[132]\n getitem_1613 = _foreach_sub_2[133]\n getitem_1614 = _foreach_sub_2[134]\n getitem_1615 = _foreach_sub_2[135]\n getitem_1616 = _foreach_sub_2[136]\n getitem_1617 = _foreach_sub_2[137]\n getitem_1618 = _foreach_sub_2[138]\n getitem_1619 = _foreach_sub_2[139]\n getitem_1620 = _foreach_sub_2[140]\n getitem_1621 = _foreach_sub_2[141]\n getitem_1622 = _foreach_sub_2[142]\n getitem_1623 = _foreach_sub_2[143]\n getitem_1624 = _foreach_sub_2[144]\n getitem_1625 = _foreach_sub_2[145]\n getitem_1626 = _foreach_sub_2[146]\n getitem_1627 = _foreach_sub_2[147]; _foreach_sub_2 = None\n _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]); getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None\n getitem_1628 = _foreach_neg[0]\n getitem_1629 = _foreach_neg[1]\n getitem_1630 = _foreach_neg[2]\n getitem_1631 = _foreach_neg[3]\n getitem_1632 = _foreach_neg[4]\n getitem_1633 = _foreach_neg[5]\n getitem_1634 = _foreach_neg[6]\n getitem_1635 = _foreach_neg[7]\n getitem_1636 = _foreach_neg[8]\n getitem_1637 = _foreach_neg[9]\n getitem_1638 = _foreach_neg[10]\n getitem_1639 = _foreach_neg[11]\n getitem_1640 = _foreach_neg[12]\n getitem_1641 = _foreach_neg[13]\n getitem_1642 = _foreach_neg[14]\n getitem_1643 = _foreach_neg[15]\n getitem_1644 = _foreach_neg[16]\n getitem_1645 = _foreach_neg[17]\n getitem_1646 = _foreach_neg[18]\n getitem_1647 = _foreach_neg[19]\n getitem_1648 = _foreach_neg[20]\n getitem_1649 = _foreach_neg[21]\n getitem_1650 = _foreach_neg[22]\n getitem_1651 = _foreach_neg[23]\n getitem_1652 = _foreach_neg[24]\n getitem_1653 = _foreach_neg[25]\n getitem_1654 = _foreach_neg[26]\n getitem_1655 = _foreach_neg[27]\n getitem_1656 = _foreach_neg[28]\n getitem_1657 = _foreach_neg[29]\n getitem_1658 = _foreach_neg[30]\n getitem_1659 = _foreach_neg[31]\n getitem_1660 = _foreach_neg[32]\n getitem_1661 = _foreach_neg[33]\n getitem_1662 = _foreach_neg[34]\n getitem_1663 = _foreach_neg[35]\n getitem_1664 = _foreach_neg[36]\n getitem_1665 = _foreach_neg[37]\n getitem_1666 = _foreach_neg[38]\n getitem_1667 = _foreach_neg[39]\n getitem_1668 = _foreach_neg[40]\n getitem_1669 = _foreach_neg[41]\n getitem_1670 = _foreach_neg[42]\n getitem_1671 = _foreach_neg[43]\n getitem_1672 = _foreach_neg[44]\n getitem_1673 = _foreach_neg[45]\n getitem_1674 = _foreach_neg[46]\n getitem_1675 = _foreach_neg[47]\n getitem_1676 = _foreach_neg[48]\n getitem_1677 = _foreach_neg[49]\n getitem_1678 = _foreach_neg[50]\n getitem_1679 = _foreach_neg[51]\n getitem_1680 = _foreach_neg[52]\n getitem_1681 = _foreach_neg[53]\n getitem_1682 = _foreach_neg[54]\n getitem_1683 = _foreach_neg[55]\n getitem_1684 = _foreach_neg[56]\n getitem_1685 = _foreach_neg[57]\n getitem_1686 = _foreach_neg[58]\n getitem_1687 = _foreach_neg[59]\n getitem_1688 = _foreach_neg[60]\n getitem_1689 = _foreach_neg[61]\n getitem_1690 = _foreach_neg[62]\n getitem_1691 = _foreach_neg[63]\n getitem_1692 = _foreach_neg[64]\n getitem_1693 = _foreach_neg[65]\n getitem_1694 = _foreach_neg[66]\n getitem_1695 = _foreach_neg[67]\n getitem_1696 = _foreach_neg[68]\n getitem_1697 = _foreach_neg[69]\n getitem_1698 = _foreach_neg[70]\n getitem_1699 = _foreach_neg[71]\n getitem_1700 = _foreach_neg[72]\n getitem_1701 = _foreach_neg[73]\n getitem_1702 = _foreach_neg[74]\n getitem_1703 = _foreach_neg[75]\n getitem_1704 = _foreach_neg[76]\n getitem_1705 = _foreach_neg[77]\n getitem_1706 = _foreach_neg[78]\n getitem_1707 = _foreach_neg[79]\n getitem_1708 = _foreach_neg[80]\n getitem_1709 = _foreach_neg[81]\n getitem_1710 = _foreach_neg[82]\n getitem_1711 = _foreach_neg[83]\n getitem_1712 = _foreach_neg[84]\n getitem_1713 = _foreach_neg[85]\n getitem_1714 = _foreach_neg[86]\n getitem_1715 = _foreach_neg[87]\n getitem_1716 = _foreach_neg[88]\n getitem_1717 = _foreach_neg[89]\n getitem_1718 = _foreach_neg[90]\n getitem_1719 = _foreach_neg[91]\n getitem_1720 = _foreach_neg[92]\n getitem_1721 = _foreach_neg[93]\n getitem_1722 = _foreach_neg[94]\n getitem_1723 = _foreach_neg[95]\n getitem_1724 = _foreach_neg[96]\n getitem_1725 = _foreach_neg[97]\n getitem_1726 = _foreach_neg[98]\n getitem_1727 = _foreach_neg[99]\n getitem_1728 = _foreach_neg[100]\n getitem_1729 = _foreach_neg[101]\n getitem_1730 = _foreach_neg[102]\n getitem_1731 = _foreach_neg[103]\n getitem_1732 = _foreach_neg[104]\n getitem_1733 = _foreach_neg[105]\n getitem_1734 = _foreach_neg[106]\n getitem_1735 = _foreach_neg[107]\n getitem_1736 = _foreach_neg[108]\n getitem_1737 = _foreach_neg[109]\n getitem_1738 = _foreach_neg[110]\n getitem_1739 = _foreach_neg[111]\n getitem_1740 = _foreach_neg[112]\n getitem_1741 = _foreach_neg[113]\n getitem_1742 = _foreach_neg[114]\n getitem_1743 = _foreach_neg[115]\n getitem_1744 = _foreach_neg[116]\n getitem_1745 = _foreach_neg[117]\n getitem_1746 = _foreach_neg[118]\n getitem_1747 = _foreach_neg[119]\n getitem_1748 = _foreach_neg[120]\n getitem_1749 = _foreach_neg[121]\n getitem_1750 = _foreach_neg[122]\n getitem_1751 = _foreach_neg[123]\n getitem_1752 = _foreach_neg[124]\n getitem_1753 = _foreach_neg[125]\n getitem_1754 = _foreach_neg[126]\n getitem_1755 = _foreach_neg[127]\n getitem_1756 = _foreach_neg[128]\n getitem_1757 = _foreach_neg[129]\n getitem_1758 = _foreach_neg[130]\n getitem_1759 = _foreach_neg[131]\n getitem_1760 = _foreach_neg[132]\n getitem_1761 = _foreach_neg[133]\n getitem_1762 = _foreach_neg[134]\n getitem_1763 = _foreach_neg[135]\n getitem_1764 = _foreach_neg[136]\n getitem_1765 = _foreach_neg[137]\n getitem_1766 = _foreach_neg[138]\n getitem_1767 = _foreach_neg[139]\n getitem_1768 = _foreach_neg[140]\n getitem_1769 = _foreach_neg[141]\n getitem_1770 = _foreach_neg[142]\n getitem_1771 = _foreach_neg[143]\n getitem_1772 = _foreach_neg[144]\n getitem_1773 = _foreach_neg[145]\n getitem_1774 = _foreach_neg[146]\n getitem_1775 = _foreach_neg[147]; _foreach_neg = None\n _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01); getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None\n getitem_1776 = _foreach_div[0]\n getitem_1777 = _foreach_div[1]\n getitem_1778 = _foreach_div[2]\n getitem_1779 = _foreach_div[3]\n getitem_1780 = _foreach_div[4]\n getitem_1781 = _foreach_div[5]\n getitem_1782 = _foreach_div[6]\n getitem_1783 = _foreach_div[7]\n getitem_1784 = _foreach_div[8]\n getitem_1785 = _foreach_div[9]\n getitem_1786 = _foreach_div[10]\n getitem_1787 = _foreach_div[11]\n getitem_1788 = _foreach_div[12]\n getitem_1789 = _foreach_div[13]\n getitem_1790 = _foreach_div[14]\n getitem_1791 = _foreach_div[15]\n getitem_1792 = _foreach_div[16]\n getitem_1793 = _foreach_div[17]\n getitem_1794 = _foreach_div[18]\n getitem_1795 = _foreach_div[19]\n getitem_1796 = _foreach_div[20]\n getitem_1797 = _foreach_div[21]\n getitem_1798 = _foreach_div[22]\n getitem_1799 = _foreach_div[23]\n getitem_1800 = _foreach_div[24]\n getitem_1801 = _foreach_div[25]\n getitem_1802 = _foreach_div[26]\n getitem_1803 = _foreach_div[27]\n getitem_1804 = _foreach_div[28]\n getitem_1805 = _foreach_div[29]\n getitem_1806 = _foreach_div[30]\n getitem_1807 = _foreach_div[31]\n getitem_1808 = _foreach_div[32]\n getitem_1809 = _foreach_div[33]\n getitem_1810 = _foreach_div[34]\n getitem_1811 = _foreach_div[35]\n getitem_1812 = _foreach_div[36]\n getitem_1813 = _foreach_div[37]\n getitem_1814 = _foreach_div[38]\n getitem_1815 = _foreach_div[39]\n getitem_1816 = _foreach_div[40]\n getitem_1817 = _foreach_div[41]\n getitem_1818 = _foreach_div[42]\n getitem_1819 = _foreach_div[43]\n getitem_1820 = _foreach_div[44]\n getitem_1821 = _foreach_div[45]\n getitem_1822 = _foreach_div[46]\n getitem_1823 = _foreach_div[47]\n getitem_1824 = _foreach_div[48]\n getitem_1825 = _foreach_div[49]\n getitem_1826 = _foreach_div[50]\n getitem_1827 = _foreach_div[51]\n getitem_1828 = _foreach_div[52]\n getitem_1829 = _foreach_div[53]\n getitem_1830 = _foreach_div[54]\n getitem_1831 = _foreach_div[55]\n getitem_1832 = _foreach_div[56]\n getitem_1833 = _foreach_div[57]\n getitem_1834 = _foreach_div[58]\n getitem_1835 = _foreach_div[59]\n getitem_1836 = _foreach_div[60]\n getitem_1837 = _foreach_div[61]\n getitem_1838 = _foreach_div[62]\n getitem_1839 = _foreach_div[63]\n getitem_1840 = _foreach_div[64]\n getitem_1841 = _foreach_div[65]\n getitem_1842 = _foreach_div[66]\n getitem_1843 = _foreach_div[67]\n getitem_1844 = _foreach_div[68]\n getitem_1845 = _foreach_div[69]\n getitem_1846 = _foreach_div[70]\n getitem_1847 = _foreach_div[71]\n getitem_1848 = _foreach_div[72]\n getitem_1849 = _foreach_div[73]\n getitem_1850 = _foreach_div[74]\n getitem_1851 = _foreach_div[75]\n getitem_1852 = _foreach_div[76]\n getitem_1853 = _foreach_div[77]\n getitem_1854 = _foreach_div[78]\n getitem_1855 = _foreach_div[79]\n getitem_1856 = _foreach_div[80]\n getitem_1857 = _foreach_div[81]\n getitem_1858 = _foreach_div[82]\n getitem_1859 = _foreach_div[83]\n getitem_1860 = _foreach_div[84]\n getitem_1861 = _foreach_div[85]\n getitem_1862 = _foreach_div[86]\n getitem_1863 = _foreach_div[87]\n getitem_1864 = _foreach_div[88]\n getitem_1865 = _foreach_div[89]\n getitem_1866 = _foreach_div[90]\n getitem_1867 = _foreach_div[91]\n getitem_1868 = _foreach_div[92]\n getitem_1869 = _foreach_div[93]\n getitem_1870 = _foreach_div[94]\n getitem_1871 = _foreach_div[95]\n getitem_1872 = _foreach_div[96]\n getitem_1873 = _foreach_div[97]\n getitem_1874 = _foreach_div[98]\n getitem_1875 = _foreach_div[99]\n getitem_1876 = _foreach_div[100]\n getitem_1877 = _foreach_div[101]\n getitem_1878 = _foreach_div[102]\n getitem_1879 = _foreach_div[103]\n getitem_1880 = _foreach_div[104]\n getitem_1881 = _foreach_div[105]\n getitem_1882 = _foreach_div[106]\n getitem_1883 = _foreach_div[107]\n getitem_1884 = _foreach_div[108]\n getitem_1885 = _foreach_div[109]\n getitem_1886 = _foreach_div[110]\n getitem_1887 = _foreach_div[111]\n getitem_1888 = _foreach_div[112]\n getitem_1889 = _foreach_div[113]\n getitem_1890 = _foreach_div[114]\n getitem_1891 = _foreach_div[115]\n getitem_1892 = _foreach_div[116]\n getitem_1893 = _foreach_div[117]\n getitem_1894 = _foreach_div[118]\n getitem_1895 = _foreach_div[119]\n getitem_1896 = _foreach_div[120]\n getitem_1897 = _foreach_div[121]\n getitem_1898 = _foreach_div[122]\n getitem_1899 = _foreach_div[123]\n getitem_1900 = _foreach_div[124]\n getitem_1901 = _foreach_div[125]\n getitem_1902 = _foreach_div[126]\n getitem_1903 = _foreach_div[127]\n getitem_1904 = _foreach_div[128]\n getitem_1905 = _foreach_div[129]\n getitem_1906 = _foreach_div[130]\n getitem_1907 = _foreach_div[131]\n getitem_1908 = _foreach_div[132]\n getitem_1909 = _foreach_div[133]\n getitem_1910 = _foreach_div[134]\n getitem_1911 = _foreach_div[135]\n getitem_1912 = _foreach_div[136]\n getitem_1913 = _foreach_div[137]\n getitem_1914 = _foreach_div[138]\n getitem_1915 = _foreach_div[139]\n getitem_1916 = _foreach_div[140]\n getitem_1917 = _foreach_div[141]\n getitem_1918 = _foreach_div[142]\n getitem_1919 = _foreach_div[143]\n getitem_1920 = _foreach_div[144]\n getitem_1921 = _foreach_div[145]\n getitem_1922 = _foreach_div[146]\n getitem_1923 = _foreach_div[147]; _foreach_div = None\n _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]); getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None\n getitem_1924 = _foreach_reciprocal[0]\n getitem_1925 = _foreach_reciprocal[1]\n getitem_1926 = _foreach_reciprocal[2]\n getitem_1927 = _foreach_reciprocal[3]\n getitem_1928 = _foreach_reciprocal[4]\n getitem_1929 = _foreach_reciprocal[5]\n getitem_1930 = _foreach_reciprocal[6]\n getitem_1931 = _foreach_reciprocal[7]\n getitem_1932 = _foreach_reciprocal[8]\n getitem_1933 = _foreach_reciprocal[9]\n getitem_1934 = _foreach_reciprocal[10]\n getitem_1935 = _foreach_reciprocal[11]\n getitem_1936 = _foreach_reciprocal[12]\n getitem_1937 = _foreach_reciprocal[13]\n getitem_1938 = _foreach_reciprocal[14]\n getitem_1939 = _foreach_reciprocal[15]\n getitem_1940 = _foreach_reciprocal[16]\n getitem_1941 = _foreach_reciprocal[17]\n getitem_1942 = _foreach_reciprocal[18]\n getitem_1943 = _foreach_reciprocal[19]\n getitem_1944 = _foreach_reciprocal[20]\n getitem_1945 = _foreach_reciprocal[21]\n getitem_1946 = _foreach_reciprocal[22]\n getitem_1947 = _foreach_reciprocal[23]\n getitem_1948 = _foreach_reciprocal[24]\n getitem_1949 = _foreach_reciprocal[25]\n getitem_1950 = _foreach_reciprocal[26]\n getitem_1951 = _foreach_reciprocal[27]\n getitem_1952 = _foreach_reciprocal[28]\n getitem_1953 = _foreach_reciprocal[29]\n getitem_1954 = _foreach_reciprocal[30]\n getitem_1955 = _foreach_reciprocal[31]\n getitem_1956 = _foreach_reciprocal[32]\n getitem_1957 = _foreach_reciprocal[33]\n getitem_1958 = _foreach_reciprocal[34]\n getitem_1959 = _foreach_reciprocal[35]\n getitem_1960 = _foreach_reciprocal[36]\n getitem_1961 = _foreach_reciprocal[37]\n getitem_1962 = _foreach_reciprocal[38]\n getitem_1963 = _foreach_reciprocal[39]\n getitem_1964 = _foreach_reciprocal[40]\n getitem_1965 = _foreach_reciprocal[41]\n getitem_1966 = _foreach_reciprocal[42]\n getitem_1967 = _foreach_reciprocal[43]\n getitem_1968 = _foreach_reciprocal[44]\n getitem_1969 = _foreach_reciprocal[45]\n getitem_1970 = _foreach_reciprocal[46]\n getitem_1971 = _foreach_reciprocal[47]\n getitem_1972 = _foreach_reciprocal[48]\n getitem_1973 = _foreach_reciprocal[49]\n getitem_1974 = _foreach_reciprocal[50]\n getitem_1975 = _foreach_reciprocal[51]\n getitem_1976 = _foreach_reciprocal[52]\n getitem_1977 = _foreach_reciprocal[53]\n getitem_1978 = _foreach_reciprocal[54]\n getitem_1979 = _foreach_reciprocal[55]\n getitem_1980 = _foreach_reciprocal[56]\n getitem_1981 = _foreach_reciprocal[57]\n getitem_1982 = _foreach_reciprocal[58]\n getitem_1983 = _foreach_reciprocal[59]\n getitem_1984 = _foreach_reciprocal[60]\n getitem_1985 = _foreach_reciprocal[61]\n getitem_1986 = _foreach_reciprocal[62]\n getitem_1987 = _foreach_reciprocal[63]\n getitem_1988 = _foreach_reciprocal[64]\n getitem_1989 = _foreach_reciprocal[65]\n getitem_1990 = _foreach_reciprocal[66]\n getitem_1991 = _foreach_reciprocal[67]\n getitem_1992 = _foreach_reciprocal[68]\n getitem_1993 = _foreach_reciprocal[69]\n getitem_1994 = _foreach_reciprocal[70]\n getitem_1995 = _foreach_reciprocal[71]\n getitem_1996 = _foreach_reciprocal[72]\n getitem_1997 = _foreach_reciprocal[73]\n getitem_1998 = _foreach_reciprocal[74]\n getitem_1999 = _foreach_reciprocal[75]\n getitem_2000 = _foreach_reciprocal[76]\n getitem_2001 = _foreach_reciprocal[77]\n getitem_2002 = _foreach_reciprocal[78]\n getitem_2003 = _foreach_reciprocal[79]\n getitem_2004 = _foreach_reciprocal[80]\n getitem_2005 = _foreach_reciprocal[81]\n getitem_2006 = _foreach_reciprocal[82]\n getitem_2007 = _foreach_reciprocal[83]\n getitem_2008 = _foreach_reciprocal[84]\n getitem_2009 = _foreach_reciprocal[85]\n getitem_2010 = _foreach_reciprocal[86]\n getitem_2011 = _foreach_reciprocal[87]\n getitem_2012 = _foreach_reciprocal[88]\n getitem_2013 = _foreach_reciprocal[89]\n getitem_2014 = _foreach_reciprocal[90]\n getitem_2015 = _foreach_reciprocal[91]\n getitem_2016 = _foreach_reciprocal[92]\n getitem_2017 = _foreach_reciprocal[93]\n getitem_2018 = _foreach_reciprocal[94]\n getitem_2019 = _foreach_reciprocal[95]\n getitem_2020 = _foreach_reciprocal[96]\n getitem_2021 = _foreach_reciprocal[97]\n getitem_2022 = _foreach_reciprocal[98]\n getitem_2023 = _foreach_reciprocal[99]\n getitem_2024 = _foreach_reciprocal[100]\n getitem_2025 = _foreach_reciprocal[101]\n getitem_2026 = _foreach_reciprocal[102]\n getitem_2027 = _foreach_reciprocal[103]\n getitem_2028 = _foreach_reciprocal[104]\n getitem_2029 = _foreach_reciprocal[105]\n getitem_2030 = _foreach_reciprocal[106]\n getitem_2031 = _foreach_reciprocal[107]\n getitem_2032 = _foreach_reciprocal[108]\n getitem_2033 = _foreach_reciprocal[109]\n getitem_2034 = _foreach_reciprocal[110]\n getitem_2035 = _foreach_reciprocal[111]\n getitem_2036 = _foreach_reciprocal[112]\n getitem_2037 = _foreach_reciprocal[113]\n getitem_2038 = _foreach_reciprocal[114]\n getitem_2039 = _foreach_reciprocal[115]\n getitem_2040 = _foreach_reciprocal[116]\n getitem_2041 = _foreach_reciprocal[117]\n getitem_2042 = _foreach_reciprocal[118]\n getitem_2043 = _foreach_reciprocal[119]\n getitem_2044 = _foreach_reciprocal[120]\n getitem_2045 = _foreach_reciprocal[121]\n getitem_2046 = _foreach_reciprocal[122]\n getitem_2047 = _foreach_reciprocal[123]\n getitem_2048 = _foreach_reciprocal[124]\n getitem_2049 = _foreach_reciprocal[125]\n getitem_2050 = _foreach_reciprocal[126]\n getitem_2051 = _foreach_reciprocal[127]\n getitem_2052 = _foreach_reciprocal[128]\n getitem_2053 = _foreach_reciprocal[129]\n getitem_2054 = _foreach_reciprocal[130]\n getitem_2055 = _foreach_reciprocal[131]\n getitem_2056 = _foreach_reciprocal[132]\n getitem_2057 = _foreach_reciprocal[133]\n getitem_2058 = _foreach_reciprocal[134]\n getitem_2059 = _foreach_reciprocal[135]\n getitem_2060 = _foreach_reciprocal[136]\n getitem_2061 = _foreach_reciprocal[137]\n getitem_2062 = _foreach_reciprocal[138]\n getitem_2063 = _foreach_reciprocal[139]\n getitem_2064 = _foreach_reciprocal[140]\n getitem_2065 = _foreach_reciprocal[141]\n getitem_2066 = _foreach_reciprocal[142]\n getitem_2067 = _foreach_reciprocal[143]\n getitem_2068 = _foreach_reciprocal[144]\n getitem_2069 = _foreach_reciprocal[145]\n getitem_2070 = _foreach_reciprocal[146]\n getitem_2071 = _foreach_reciprocal[147]; _foreach_reciprocal = None\n _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]); getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None\n getitem_2072 = _foreach_sqrt[0]\n getitem_2073 = _foreach_sqrt[1]\n getitem_2074 = _foreach_sqrt[2]\n getitem_2075 = _foreach_sqrt[3]\n getitem_2076 = _foreach_sqrt[4]\n getitem_2077 = _foreach_sqrt[5]\n getitem_2078 = _foreach_sqrt[6]\n getitem_2079 = _foreach_sqrt[7]\n getitem_2080 = _foreach_sqrt[8]\n getitem_2081 = _foreach_sqrt[9]\n getitem_2082 = _foreach_sqrt[10]\n getitem_2083 = _foreach_sqrt[11]\n getitem_2084 = _foreach_sqrt[12]\n getitem_2085 = _foreach_sqrt[13]\n getitem_2086 = _foreach_sqrt[14]\n getitem_2087 = _foreach_sqrt[15]\n getitem_2088 = _foreach_sqrt[16]\n getitem_2089 = _foreach_sqrt[17]\n getitem_2090 = _foreach_sqrt[18]\n getitem_2091 = _foreach_sqrt[19]\n getitem_2092 = _foreach_sqrt[20]\n getitem_2093 = _foreach_sqrt[21]\n getitem_2094 = _foreach_sqrt[22]\n getitem_2095 = _foreach_sqrt[23]\n getitem_2096 = _foreach_sqrt[24]\n getitem_2097 = _foreach_sqrt[25]\n getitem_2098 = _foreach_sqrt[26]\n getitem_2099 = _foreach_sqrt[27]\n getitem_2100 = _foreach_sqrt[28]\n getitem_2101 = _foreach_sqrt[29]\n getitem_2102 = _foreach_sqrt[30]\n getitem_2103 = _foreach_sqrt[31]\n getitem_2104 = _foreach_sqrt[32]\n getitem_2105 = _foreach_sqrt[33]\n getitem_2106 = _foreach_sqrt[34]\n getitem_2107 = _foreach_sqrt[35]\n getitem_2108 = _foreach_sqrt[36]\n getitem_2109 = _foreach_sqrt[37]\n getitem_2110 = _foreach_sqrt[38]\n getitem_2111 = _foreach_sqrt[39]\n getitem_2112 = _foreach_sqrt[40]\n getitem_2113 = _foreach_sqrt[41]\n getitem_2114 = _foreach_sqrt[42]\n getitem_2115 = _foreach_sqrt[43]\n getitem_2116 = _foreach_sqrt[44]\n getitem_2117 = _foreach_sqrt[45]\n getitem_2118 = _foreach_sqrt[46]\n getitem_2119 = _foreach_sqrt[47]\n getitem_2120 = _foreach_sqrt[48]\n getitem_2121 = _foreach_sqrt[49]\n getitem_2122 = _foreach_sqrt[50]\n getitem_2123 = _foreach_sqrt[51]\n getitem_2124 = _foreach_sqrt[52]\n getitem_2125 = _foreach_sqrt[53]\n getitem_2126 = _foreach_sqrt[54]\n getitem_2127 = _foreach_sqrt[55]\n getitem_2128 = _foreach_sqrt[56]\n getitem_2129 = _foreach_sqrt[57]\n getitem_2130 = _foreach_sqrt[58]\n getitem_2131 = _foreach_sqrt[59]\n getitem_2132 = _foreach_sqrt[60]\n getitem_2133 = _foreach_sqrt[61]\n getitem_2134 = _foreach_sqrt[62]\n getitem_2135 = _foreach_sqrt[63]\n getitem_2136 = _foreach_sqrt[64]\n getitem_2137 = _foreach_sqrt[65]\n getitem_2138 = _foreach_sqrt[66]\n getitem_2139 = _foreach_sqrt[67]\n getitem_2140 = _foreach_sqrt[68]\n getitem_2141 = _foreach_sqrt[69]\n getitem_2142 = _foreach_sqrt[70]\n getitem_2143 = _foreach_sqrt[71]\n getitem_2144 = _foreach_sqrt[72]\n getitem_2145 = _foreach_sqrt[73]\n getitem_2146 = _foreach_sqrt[74]\n getitem_2147 = _foreach_sqrt[75]\n getitem_2148 = _foreach_sqrt[76]\n getitem_2149 = _foreach_sqrt[77]\n getitem_2150 = _foreach_sqrt[78]\n getitem_2151 = _foreach_sqrt[79]\n getitem_2152 = _foreach_sqrt[80]\n getitem_2153 = _foreach_sqrt[81]\n getitem_2154 = _foreach_sqrt[82]\n getitem_2155 = _foreach_sqrt[83]\n getitem_2156 = _foreach_sqrt[84]\n getitem_2157 = _foreach_sqrt[85]\n getitem_2158 = _foreach_sqrt[86]\n getitem_2159 = _foreach_sqrt[87]\n getitem_2160 = _foreach_sqrt[88]\n getitem_2161 = _foreach_sqrt[89]\n getitem_2162 = _foreach_sqrt[90]\n getitem_2163 = _foreach_sqrt[91]\n getitem_2164 = _foreach_sqrt[92]\n getitem_2165 = _foreach_sqrt[93]\n getitem_2166 = _foreach_sqrt[94]\n getitem_2167 = _foreach_sqrt[95]\n getitem_2168 = _foreach_sqrt[96]\n getitem_2169 = _foreach_sqrt[97]\n getitem_2170 = _foreach_sqrt[98]\n getitem_2171 = _foreach_sqrt[99]\n getitem_2172 = _foreach_sqrt[100]\n getitem_2173 = _foreach_sqrt[101]\n getitem_2174 = _foreach_sqrt[102]\n getitem_2175 = _foreach_sqrt[103]\n getitem_2176 = _foreach_sqrt[104]\n getitem_2177 = _foreach_sqrt[105]\n getitem_2178 = _foreach_sqrt[106]\n getitem_2179 = _foreach_sqrt[107]\n getitem_2180 = _foreach_sqrt[108]\n getitem_2181 = _foreach_sqrt[109]\n getitem_2182 = _foreach_sqrt[110]\n getitem_2183 = _foreach_sqrt[111]\n getitem_2184 = _foreach_sqrt[112]\n getitem_2185 = _foreach_sqrt[113]\n getitem_2186 = _foreach_sqrt[114]\n getitem_2187 = _foreach_sqrt[115]\n getitem_2188 = _foreach_sqrt[116]\n getitem_2189 = _foreach_sqrt[117]\n getitem_2190 = _foreach_sqrt[118]\n getitem_2191 = _foreach_sqrt[119]\n getitem_2192 = _foreach_sqrt[120]\n getitem_2193 = _foreach_sqrt[121]\n getitem_2194 = _foreach_sqrt[122]\n getitem_2195 = _foreach_sqrt[123]\n getitem_2196 = _foreach_sqrt[124]\n getitem_2197 = _foreach_sqrt[125]\n getitem_2198 = _foreach_sqrt[126]\n getitem_2199 = _foreach_sqrt[127]\n getitem_2200 = _foreach_sqrt[128]\n getitem_2201 = _foreach_sqrt[129]\n getitem_2202 = _foreach_sqrt[130]\n getitem_2203 = _foreach_sqrt[131]\n getitem_2204 = _foreach_sqrt[132]\n getitem_2205 = _foreach_sqrt[133]\n getitem_2206 = _foreach_sqrt[134]\n getitem_2207 = _foreach_sqrt[135]\n getitem_2208 = _foreach_sqrt[136]\n getitem_2209 = _foreach_sqrt[137]\n getitem_2210 = _foreach_sqrt[138]\n getitem_2211 = _foreach_sqrt[139]\n getitem_2212 = _foreach_sqrt[140]\n getitem_2213 = _foreach_sqrt[141]\n getitem_2214 = _foreach_sqrt[142]\n getitem_2215 = _foreach_sqrt[143]\n getitem_2216 = _foreach_sqrt[144]\n getitem_2217 = _foreach_sqrt[145]\n getitem_2218 = _foreach_sqrt[146]\n getitem_2219 = _foreach_sqrt[147]; _foreach_sqrt = None\n _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])\n getitem_2220 = _foreach_sqrt_1[0]\n getitem_2221 = _foreach_sqrt_1[1]\n getitem_2222 = _foreach_sqrt_1[2]\n getitem_2223 = _foreach_sqrt_1[3]\n getitem_2224 = _foreach_sqrt_1[4]\n getitem_2225 = _foreach_sqrt_1[5]\n getitem_2226 = _foreach_sqrt_1[6]\n getitem_2227 = _foreach_sqrt_1[7]\n getitem_2228 = _foreach_sqrt_1[8]\n getitem_2229 = _foreach_sqrt_1[9]\n getitem_2230 = _foreach_sqrt_1[10]\n getitem_2231 = _foreach_sqrt_1[11]\n getitem_2232 = _foreach_sqrt_1[12]\n getitem_2233 = _foreach_sqrt_1[13]\n getitem_2234 = _foreach_sqrt_1[14]\n getitem_2235 = _foreach_sqrt_1[15]\n getitem_2236 = _foreach_sqrt_1[16]\n getitem_2237 = _foreach_sqrt_1[17]\n getitem_2238 = _foreach_sqrt_1[18]\n getitem_2239 = _foreach_sqrt_1[19]\n getitem_2240 = _foreach_sqrt_1[20]\n getitem_2241 = _foreach_sqrt_1[21]\n getitem_2242 = _foreach_sqrt_1[22]\n getitem_2243 = _foreach_sqrt_1[23]\n getitem_2244 = _foreach_sqrt_1[24]\n getitem_2245 = _foreach_sqrt_1[25]\n getitem_2246 = _foreach_sqrt_1[26]\n getitem_2247 = _foreach_sqrt_1[27]\n getitem_2248 = _foreach_sqrt_1[28]\n getitem_2249 = _foreach_sqrt_1[29]\n getitem_2250 = _foreach_sqrt_1[30]\n getitem_2251 = _foreach_sqrt_1[31]\n getitem_2252 = _foreach_sqrt_1[32]\n getitem_2253 = _foreach_sqrt_1[33]\n getitem_2254 = _foreach_sqrt_1[34]\n getitem_2255 = _foreach_sqrt_1[35]\n getitem_2256 = _foreach_sqrt_1[36]\n getitem_2257 = _foreach_sqrt_1[37]\n getitem_2258 = _foreach_sqrt_1[38]\n getitem_2259 = _foreach_sqrt_1[39]\n getitem_2260 = _foreach_sqrt_1[40]\n getitem_2261 = _foreach_sqrt_1[41]\n getitem_2262 = _foreach_sqrt_1[42]\n getitem_2263 = _foreach_sqrt_1[43]\n getitem_2264 = _foreach_sqrt_1[44]\n getitem_2265 = _foreach_sqrt_1[45]\n getitem_2266 = _foreach_sqrt_1[46]\n getitem_2267 = _foreach_sqrt_1[47]\n getitem_2268 = _foreach_sqrt_1[48]\n getitem_2269 = _foreach_sqrt_1[49]\n getitem_2270 = _foreach_sqrt_1[50]\n getitem_2271 = _foreach_sqrt_1[51]\n getitem_2272 = _foreach_sqrt_1[52]\n getitem_2273 = _foreach_sqrt_1[53]\n getitem_2274 = _foreach_sqrt_1[54]\n getitem_2275 = _foreach_sqrt_1[55]\n getitem_2276 = _foreach_sqrt_1[56]\n getitem_2277 = _foreach_sqrt_1[57]\n getitem_2278 = _foreach_sqrt_1[58]\n getitem_2279 = _foreach_sqrt_1[59]\n getitem_2280 = _foreach_sqrt_1[60]\n getitem_2281 = _foreach_sqrt_1[61]\n getitem_2282 = _foreach_sqrt_1[62]\n getitem_2283 = _foreach_sqrt_1[63]\n getitem_2284 = _foreach_sqrt_1[64]\n getitem_2285 = _foreach_sqrt_1[65]\n getitem_2286 = _foreach_sqrt_1[66]\n getitem_2287 = _foreach_sqrt_1[67]\n getitem_2288 = _foreach_sqrt_1[68]\n getitem_2289 = _foreach_sqrt_1[69]\n getitem_2290 = _foreach_sqrt_1[70]\n getitem_2291 = _foreach_sqrt_1[71]\n getitem_2292 = _foreach_sqrt_1[72]\n getitem_2293 = _foreach_sqrt_1[73]\n getitem_2294 = _foreach_sqrt_1[74]\n getitem_2295 = _foreach_sqrt_1[75]\n getitem_2296 = _foreach_sqrt_1[76]\n getitem_2297 = _foreach_sqrt_1[77]\n getitem_2298 = _foreach_sqrt_1[78]\n getitem_2299 = _foreach_sqrt_1[79]\n getitem_2300 = _foreach_sqrt_1[80]\n getitem_2301 = _foreach_sqrt_1[81]\n getitem_2302 = _foreach_sqrt_1[82]\n getitem_2303 = _foreach_sqrt_1[83]\n getitem_2304 = _foreach_sqrt_1[84]\n getitem_2305 = _foreach_sqrt_1[85]\n getitem_2306 = _foreach_sqrt_1[86]\n getitem_2307 = _foreach_sqrt_1[87]\n getitem_2308 = _foreach_sqrt_1[88]\n getitem_2309 = _foreach_sqrt_1[89]\n getitem_2310 = _foreach_sqrt_1[90]\n getitem_2311 = _foreach_sqrt_1[91]\n getitem_2312 = _foreach_sqrt_1[92]\n getitem_2313 = _foreach_sqrt_1[93]\n getitem_2314 = _foreach_sqrt_1[94]\n getitem_2315 = _foreach_sqrt_1[95]\n getitem_2316 = _foreach_sqrt_1[96]\n getitem_2317 = _foreach_sqrt_1[97]\n getitem_2318 = _foreach_sqrt_1[98]\n getitem_2319 = _foreach_sqrt_1[99]\n getitem_2320 = _foreach_sqrt_1[100]\n getitem_2321 = _foreach_sqrt_1[101]\n getitem_2322 = _foreach_sqrt_1[102]\n getitem_2323 = _foreach_sqrt_1[103]\n getitem_2324 = _foreach_sqrt_1[104]\n getitem_2325 = _foreach_sqrt_1[105]\n getitem_2326 = _foreach_sqrt_1[106]\n getitem_2327 = _foreach_sqrt_1[107]\n getitem_2328 = _foreach_sqrt_1[108]\n getitem_2329 = _foreach_sqrt_1[109]\n getitem_2330 = _foreach_sqrt_1[110]\n getitem_2331 = _foreach_sqrt_1[111]\n getitem_2332 = _foreach_sqrt_1[112]\n getitem_2333 = _foreach_sqrt_1[113]\n getitem_2334 = _foreach_sqrt_1[114]\n getitem_2335 = _foreach_sqrt_1[115]\n getitem_2336 = _foreach_sqrt_1[116]\n getitem_2337 = _foreach_sqrt_1[117]\n getitem_2338 = _foreach_sqrt_1[118]\n getitem_2339 = _foreach_sqrt_1[119]\n getitem_2340 = _foreach_sqrt_1[120]\n getitem_2341 = _foreach_sqrt_1[121]\n getitem_2342 = _foreach_sqrt_1[122]\n getitem_2343 = _foreach_sqrt_1[123]\n getitem_2344 = _foreach_sqrt_1[124]\n getitem_2345 = _foreach_sqrt_1[125]\n getitem_2346 = _foreach_sqrt_1[126]\n getitem_2347 = _foreach_sqrt_1[127]\n getitem_2348 = _foreach_sqrt_1[128]\n getitem_2349 = _foreach_sqrt_1[129]\n getitem_2350 = _foreach_sqrt_1[130]\n getitem_2351 = _foreach_sqrt_1[131]\n getitem_2352 = _foreach_sqrt_1[132]\n getitem_2353 = _foreach_sqrt_1[133]\n getitem_2354 = _foreach_sqrt_1[134]\n getitem_2355 = _foreach_sqrt_1[135]\n getitem_2356 = _foreach_sqrt_1[136]\n getitem_2357 = _foreach_sqrt_1[137]\n getitem_2358 = _foreach_sqrt_1[138]\n getitem_2359 = _foreach_sqrt_1[139]\n getitem_2360 = _foreach_sqrt_1[140]\n getitem_2361 = _foreach_sqrt_1[141]\n getitem_2362 = _foreach_sqrt_1[142]\n getitem_2363 = _foreach_sqrt_1[143]\n getitem_2364 = _foreach_sqrt_1[144]\n getitem_2365 = _foreach_sqrt_1[145]\n getitem_2366 = _foreach_sqrt_1[146]\n getitem_2367 = _foreach_sqrt_1[147]; _foreach_sqrt_1 = None\n _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]); getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None\n getitem_2368 = _foreach_div_1[0]\n getitem_2369 = _foreach_div_1[1]\n getitem_2370 = _foreach_div_1[2]\n getitem_2371 = _foreach_div_1[3]\n getitem_2372 = _foreach_div_1[4]\n getitem_2373 = _foreach_div_1[5]\n getitem_2374 = _foreach_div_1[6]\n getitem_2375 = _foreach_div_1[7]\n getitem_2376 = _foreach_div_1[8]\n getitem_2377 = _foreach_div_1[9]\n getitem_2378 = _foreach_div_1[10]\n getitem_2379 = _foreach_div_1[11]\n getitem_2380 = _foreach_div_1[12]\n getitem_2381 = _foreach_div_1[13]\n getitem_2382 = _foreach_div_1[14]\n getitem_2383 = _foreach_div_1[15]\n getitem_2384 = _foreach_div_1[16]\n getitem_2385 = _foreach_div_1[17]\n getitem_2386 = _foreach_div_1[18]\n getitem_2387 = _foreach_div_1[19]\n getitem_2388 = _foreach_div_1[20]\n getitem_2389 = _foreach_div_1[21]\n getitem_2390 = _foreach_div_1[22]\n getitem_2391 = _foreach_div_1[23]\n getitem_2392 = _foreach_div_1[24]\n getitem_2393 = _foreach_div_1[25]\n getitem_2394 = _foreach_div_1[26]\n getitem_2395 = _foreach_div_1[27]\n getitem_2396 = _foreach_div_1[28]\n getitem_2397 = _foreach_div_1[29]\n getitem_2398 = _foreach_div_1[30]\n getitem_2399 = _foreach_div_1[31]\n getitem_2400 = _foreach_div_1[32]\n getitem_2401 = _foreach_div_1[33]\n getitem_2402 = _foreach_div_1[34]\n getitem_2403 = _foreach_div_1[35]\n getitem_2404 = _foreach_div_1[36]\n getitem_2405 = _foreach_div_1[37]\n getitem_2406 = _foreach_div_1[38]\n getitem_2407 = _foreach_div_1[39]\n getitem_2408 = _foreach_div_1[40]\n getitem_2409 = _foreach_div_1[41]\n getitem_2410 = _foreach_div_1[42]\n getitem_2411 = _foreach_div_1[43]\n getitem_2412 = _foreach_div_1[44]\n getitem_2413 = _foreach_div_1[45]\n getitem_2414 = _foreach_div_1[46]\n getitem_2415 = _foreach_div_1[47]\n getitem_2416 = _foreach_div_1[48]\n getitem_2417 = _foreach_div_1[49]\n getitem_2418 = _foreach_div_1[50]\n getitem_2419 = _foreach_div_1[51]\n getitem_2420 = _foreach_div_1[52]\n getitem_2421 = _foreach_div_1[53]\n getitem_2422 = _foreach_div_1[54]\n getitem_2423 = _foreach_div_1[55]\n getitem_2424 = _foreach_div_1[56]\n getitem_2425 = _foreach_div_1[57]\n getitem_2426 = _foreach_div_1[58]\n getitem_2427 = _foreach_div_1[59]\n getitem_2428 = _foreach_div_1[60]\n getitem_2429 = _foreach_div_1[61]\n getitem_2430 = _foreach_div_1[62]\n getitem_2431 = _foreach_div_1[63]\n getitem_2432 = _foreach_div_1[64]\n getitem_2433 = _foreach_div_1[65]\n getitem_2434 = _foreach_div_1[66]\n getitem_2435 = _foreach_div_1[67]\n getitem_2436 = _foreach_div_1[68]\n getitem_2437 = _foreach_div_1[69]\n getitem_2438 = _foreach_div_1[70]\n getitem_2439 = _foreach_div_1[71]\n getitem_2440 = _foreach_div_1[72]\n getitem_2441 = _foreach_div_1[73]\n getitem_2442 = _foreach_div_1[74]\n getitem_2443 = _foreach_div_1[75]\n getitem_2444 = _foreach_div_1[76]\n getitem_2445 = _foreach_div_1[77]\n getitem_2446 = _foreach_div_1[78]\n getitem_2447 = _foreach_div_1[79]\n getitem_2448 = _foreach_div_1[80]\n getitem_2449 = _foreach_div_1[81]\n getitem_2450 = _foreach_div_1[82]\n getitem_2451 = _foreach_div_1[83]\n getitem_2452 = _foreach_div_1[84]\n getitem_2453 = _foreach_div_1[85]\n getitem_2454 = _foreach_div_1[86]\n getitem_2455 = _foreach_div_1[87]\n getitem_2456 = _foreach_div_1[88]\n getitem_2457 = _foreach_div_1[89]\n getitem_2458 = _foreach_div_1[90]\n getitem_2459 = _foreach_div_1[91]\n getitem_2460 = _foreach_div_1[92]\n getitem_2461 = _foreach_div_1[93]\n getitem_2462 = _foreach_div_1[94]\n getitem_2463 = _foreach_div_1[95]\n getitem_2464 = _foreach_div_1[96]\n getitem_2465 = _foreach_div_1[97]\n getitem_2466 = _foreach_div_1[98]\n getitem_2467 = _foreach_div_1[99]\n getitem_2468 = _foreach_div_1[100]\n getitem_2469 = _foreach_div_1[101]\n getitem_2470 = _foreach_div_1[102]\n getitem_2471 = _foreach_div_1[103]\n getitem_2472 = _foreach_div_1[104]\n getitem_2473 = _foreach_div_1[105]\n getitem_2474 = _foreach_div_1[106]\n getitem_2475 = _foreach_div_1[107]\n getitem_2476 = _foreach_div_1[108]\n getitem_2477 = _foreach_div_1[109]\n getitem_2478 = _foreach_div_1[110]\n getitem_2479 = _foreach_div_1[111]\n getitem_2480 = _foreach_div_1[112]\n getitem_2481 = _foreach_div_1[113]\n getitem_2482 = _foreach_div_1[114]\n getitem_2483 = _foreach_div_1[115]\n getitem_2484 = _foreach_div_1[116]\n getitem_2485 = _foreach_div_1[117]\n getitem_2486 = _foreach_div_1[118]\n getitem_2487 = _foreach_div_1[119]\n getitem_2488 = _foreach_div_1[120]\n getitem_2489 = _foreach_div_1[121]\n getitem_2490 = _foreach_div_1[122]\n getitem_2491 = _foreach_div_1[123]\n getitem_2492 = _foreach_div_1[124]\n getitem_2493 = _foreach_div_1[125]\n getitem_2494 = _foreach_div_1[126]\n getitem_2495 = _foreach_div_1[127]\n getitem_2496 = _foreach_div_1[128]\n getitem_2497 = _foreach_div_1[129]\n getitem_2498 = _foreach_div_1[130]\n getitem_2499 = _foreach_div_1[131]\n getitem_2500 = _foreach_div_1[132]\n getitem_2501 = _foreach_div_1[133]\n getitem_2502 = _foreach_div_1[134]\n getitem_2503 = _foreach_div_1[135]\n getitem_2504 = _foreach_div_1[136]\n getitem_2505 = _foreach_div_1[137]\n getitem_2506 = _foreach_div_1[138]\n getitem_2507 = _foreach_div_1[139]\n getitem_2508 = _foreach_div_1[140]\n getitem_2509 = _foreach_div_1[141]\n getitem_2510 = _foreach_div_1[142]\n getitem_2511 = _foreach_div_1[143]\n getitem_2512 = _foreach_div_1[144]\n getitem_2513 = _foreach_div_1[145]\n getitem_2514 = _foreach_div_1[146]\n getitem_2515 = _foreach_div_1[147]; _foreach_div_1 = None\n _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08); getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None\n getitem_2516 = _foreach_add_3[0]\n getitem_2517 = _foreach_add_3[1]\n getitem_2518 = _foreach_add_3[2]\n getitem_2519 = _foreach_add_3[3]\n getitem_2520 = _foreach_add_3[4]\n getitem_2521 = _foreach_add_3[5]\n getitem_2522 = _foreach_add_3[6]\n getitem_2523 = _foreach_add_3[7]\n getitem_2524 = _foreach_add_3[8]\n getitem_2525 = _foreach_add_3[9]\n getitem_2526 = _foreach_add_3[10]\n getitem_2527 = _foreach_add_3[11]\n getitem_2528 = _foreach_add_3[12]\n getitem_2529 = _foreach_add_3[13]\n getitem_2530 = _foreach_add_3[14]\n getitem_2531 = _foreach_add_3[15]\n getitem_2532 = _foreach_add_3[16]\n getitem_2533 = _foreach_add_3[17]\n getitem_2534 = _foreach_add_3[18]\n getitem_2535 = _foreach_add_3[19]\n getitem_2536 = _foreach_add_3[20]\n getitem_2537 = _foreach_add_3[21]\n getitem_2538 = _foreach_add_3[22]\n getitem_2539 = _foreach_add_3[23]\n getitem_2540 = _foreach_add_3[24]\n getitem_2541 = _foreach_add_3[25]\n getitem_2542 = _foreach_add_3[26]\n getitem_2543 = _foreach_add_3[27]\n getitem_2544 = _foreach_add_3[28]\n getitem_2545 = _foreach_add_3[29]\n getitem_2546 = _foreach_add_3[30]\n getitem_2547 = _foreach_add_3[31]\n getitem_2548 = _foreach_add_3[32]\n getitem_2549 = _foreach_add_3[33]\n getitem_2550 = _foreach_add_3[34]\n getitem_2551 = _foreach_add_3[35]\n getitem_2552 = _foreach_add_3[36]\n getitem_2553 = _foreach_add_3[37]\n getitem_2554 = _foreach_add_3[38]\n getitem_2555 = _foreach_add_3[39]\n getitem_2556 = _foreach_add_3[40]\n getitem_2557 = _foreach_add_3[41]\n getitem_2558 = _foreach_add_3[42]\n getitem_2559 = _foreach_add_3[43]\n getitem_2560 = _foreach_add_3[44]\n getitem_2561 = _foreach_add_3[45]\n getitem_2562 = _foreach_add_3[46]\n getitem_2563 = _foreach_add_3[47]\n getitem_2564 = _foreach_add_3[48]\n getitem_2565 = _foreach_add_3[49]\n getitem_2566 = _foreach_add_3[50]\n getitem_2567 = _foreach_add_3[51]\n getitem_2568 = _foreach_add_3[52]\n getitem_2569 = _foreach_add_3[53]\n getitem_2570 = _foreach_add_3[54]\n getitem_2571 = _foreach_add_3[55]\n getitem_2572 = _foreach_add_3[56]\n getitem_2573 = _foreach_add_3[57]\n getitem_2574 = _foreach_add_3[58]\n getitem_2575 = _foreach_add_3[59]\n getitem_2576 = _foreach_add_3[60]\n getitem_2577 = _foreach_add_3[61]\n getitem_2578 = _foreach_add_3[62]\n getitem_2579 = _foreach_add_3[63]\n getitem_2580 = _foreach_add_3[64]\n getitem_2581 = _foreach_add_3[65]\n getitem_2582 = _foreach_add_3[66]\n getitem_2583 = _foreach_add_3[67]\n getitem_2584 = _foreach_add_3[68]\n getitem_2585 = _foreach_add_3[69]\n getitem_2586 = _foreach_add_3[70]\n getitem_2587 = _foreach_add_3[71]\n getitem_2588 = _foreach_add_3[72]\n getitem_2589 = _foreach_add_3[73]\n getitem_2590 = _foreach_add_3[74]\n getitem_2591 = _foreach_add_3[75]\n getitem_2592 = _foreach_add_3[76]\n getitem_2593 = _foreach_add_3[77]\n getitem_2594 = _foreach_add_3[78]\n getitem_2595 = _foreach_add_3[79]\n getitem_2596 = _foreach_add_3[80]\n getitem_2597 = _foreach_add_3[81]\n getitem_2598 = _foreach_add_3[82]\n getitem_2599 = _foreach_add_3[83]\n getitem_2600 = _foreach_add_3[84]\n getitem_2601 = _foreach_add_3[85]\n getitem_2602 = _foreach_add_3[86]\n getitem_2603 = _foreach_add_3[87]\n getitem_2604 = _foreach_add_3[88]\n getitem_2605 = _foreach_add_3[89]\n getitem_2606 = _foreach_add_3[90]\n getitem_2607 = _foreach_add_3[91]\n getitem_2608 = _foreach_add_3[92]\n getitem_2609 = _foreach_add_3[93]\n getitem_2610 = _foreach_add_3[94]\n getitem_2611 = _foreach_add_3[95]\n getitem_2612 = _foreach_add_3[96]\n getitem_2613 = _foreach_add_3[97]\n getitem_2614 = _foreach_add_3[98]\n getitem_2615 = _foreach_add_3[99]\n getitem_2616 = _foreach_add_3[100]\n getitem_2617 = _foreach_add_3[101]\n getitem_2618 = _foreach_add_3[102]\n getitem_2619 = _foreach_add_3[103]\n getitem_2620 = _foreach_add_3[104]\n getitem_2621 = _foreach_add_3[105]\n getitem_2622 = _foreach_add_3[106]\n getitem_2623 = _foreach_add_3[107]\n getitem_2624 = _foreach_add_3[108]\n getitem_2625 = _foreach_add_3[109]\n getitem_2626 = _foreach_add_3[110]\n getitem_2627 = _foreach_add_3[111]\n getitem_2628 = _foreach_add_3[112]\n getitem_2629 = _foreach_add_3[113]\n getitem_2630 = _foreach_add_3[114]\n getitem_2631 = _foreach_add_3[115]\n getitem_2632 = _foreach_add_3[116]\n getitem_2633 = _foreach_add_3[117]\n getitem_2634 = _foreach_add_3[118]\n getitem_2635 = _foreach_add_3[119]\n getitem_2636 = _foreach_add_3[120]\n getitem_2637 = _foreach_add_3[121]\n getitem_2638 = _foreach_add_3[122]\n getitem_2639 = _foreach_add_3[123]\n getitem_2640 = _foreach_add_3[124]\n getitem_2641 = _foreach_add_3[125]\n getitem_2642 = _foreach_add_3[126]\n getitem_2643 = _foreach_add_3[127]\n getitem_2644 = _foreach_add_3[128]\n getitem_2645 = _foreach_add_3[129]\n getitem_2646 = _foreach_add_3[130]\n getitem_2647 = _foreach_add_3[131]\n getitem_2648 = _foreach_add_3[132]\n getitem_2649 = _foreach_add_3[133]\n getitem_2650 = _foreach_add_3[134]\n getitem_2651 = _foreach_add_3[135]\n getitem_2652 = _foreach_add_3[136]\n getitem_2653 = _foreach_add_3[137]\n getitem_2654 = _foreach_add_3[138]\n getitem_2655 = _foreach_add_3[139]\n getitem_2656 = _foreach_add_3[140]\n getitem_2657 = _foreach_add_3[141]\n getitem_2658 = _foreach_add_3[142]\n getitem_2659 = _foreach_add_3[143]\n getitem_2660 = _foreach_add_3[144]\n getitem_2661 = _foreach_add_3[145]\n getitem_2662 = _foreach_add_3[146]\n getitem_2663 = _foreach_add_3[147]; _foreach_add_3 = None\n _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]); getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None\n getitem_2664 = _foreach_div_2[0]\n getitem_2665 = _foreach_div_2[1]\n getitem_2666 = _foreach_div_2[2]\n getitem_2667 = _foreach_div_2[3]\n getitem_2668 = _foreach_div_2[4]\n getitem_2669 = _foreach_div_2[5]\n getitem_2670 = _foreach_div_2[6]\n getitem_2671 = _foreach_div_2[7]\n getitem_2672 = _foreach_div_2[8]\n getitem_2673 = _foreach_div_2[9]\n getitem_2674 = _foreach_div_2[10]\n getitem_2675 = _foreach_div_2[11]\n getitem_2676 = _foreach_div_2[12]\n getitem_2677 = _foreach_div_2[13]\n getitem_2678 = _foreach_div_2[14]\n getitem_2679 = _foreach_div_2[15]\n getitem_2680 = _foreach_div_2[16]\n getitem_2681 = _foreach_div_2[17]\n getitem_2682 = _foreach_div_2[18]\n getitem_2683 = _foreach_div_2[19]\n getitem_2684 = _foreach_div_2[20]\n getitem_2685 = _foreach_div_2[21]\n getitem_2686 = _foreach_div_2[22]\n getitem_2687 = _foreach_div_2[23]\n getitem_2688 = _foreach_div_2[24]\n getitem_2689 = _foreach_div_2[25]\n getitem_2690 = _foreach_div_2[26]\n getitem_2691 = _foreach_div_2[27]\n getitem_2692 = _foreach_div_2[28]\n getitem_2693 = _foreach_div_2[29]\n getitem_2694 = _foreach_div_2[30]\n getitem_2695 = _foreach_div_2[31]\n getitem_2696 = _foreach_div_2[32]\n getitem_2697 = _foreach_div_2[33]\n getitem_2698 = _foreach_div_2[34]\n getitem_2699 = _foreach_div_2[35]\n getitem_2700 = _foreach_div_2[36]\n getitem_2701 = _foreach_div_2[37]\n getitem_2702 = _foreach_div_2[38]\n getitem_2703 = _foreach_div_2[39]\n getitem_2704 = _foreach_div_2[40]\n getitem_2705 = _foreach_div_2[41]\n getitem_2706 = _foreach_div_2[42]\n getitem_2707 = _foreach_div_2[43]\n getitem_2708 = _foreach_div_2[44]\n getitem_2709 = _foreach_div_2[45]\n getitem_2710 = _foreach_div_2[46]\n getitem_2711 = _foreach_div_2[47]\n getitem_2712 = _foreach_div_2[48]\n getitem_2713 = _foreach_div_2[49]\n getitem_2714 = _foreach_div_2[50]\n getitem_2715 = _foreach_div_2[51]\n getitem_2716 = _foreach_div_2[52]\n getitem_2717 = _foreach_div_2[53]\n getitem_2718 = _foreach_div_2[54]\n getitem_2719 = _foreach_div_2[55]\n getitem_2720 = _foreach_div_2[56]\n getitem_2721 = _foreach_div_2[57]\n getitem_2722 = _foreach_div_2[58]\n getitem_2723 = _foreach_div_2[59]\n getitem_2724 = _foreach_div_2[60]\n getitem_2725 = _foreach_div_2[61]\n getitem_2726 = _foreach_div_2[62]\n getitem_2727 = _foreach_div_2[63]\n getitem_2728 = _foreach_div_2[64]\n getitem_2729 = _foreach_div_2[65]\n getitem_2730 = _foreach_div_2[66]\n getitem_2731 = _foreach_div_2[67]\n getitem_2732 = _foreach_div_2[68]\n getitem_2733 = _foreach_div_2[69]\n getitem_2734 = _foreach_div_2[70]\n getitem_2735 = _foreach_div_2[71]\n getitem_2736 = _foreach_div_2[72]\n getitem_2737 = _foreach_div_2[73]\n getitem_2738 = _foreach_div_2[74]\n getitem_2739 = _foreach_div_2[75]\n getitem_2740 = _foreach_div_2[76]\n getitem_2741 = _foreach_div_2[77]\n getitem_2742 = _foreach_div_2[78]\n getitem_2743 = _foreach_div_2[79]\n getitem_2744 = _foreach_div_2[80]\n getitem_2745 = _foreach_div_2[81]\n getitem_2746 = _foreach_div_2[82]\n getitem_2747 = _foreach_div_2[83]\n getitem_2748 = _foreach_div_2[84]\n getitem_2749 = _foreach_div_2[85]\n getitem_2750 = _foreach_div_2[86]\n getitem_2751 = _foreach_div_2[87]\n getitem_2752 = _foreach_div_2[88]\n getitem_2753 = _foreach_div_2[89]\n getitem_2754 = _foreach_div_2[90]\n getitem_2755 = _foreach_div_2[91]\n getitem_2756 = _foreach_div_2[92]\n getitem_2757 = _foreach_div_2[93]\n getitem_2758 = _foreach_div_2[94]\n getitem_2759 = _foreach_div_2[95]\n getitem_2760 = _foreach_div_2[96]\n getitem_2761 = _foreach_div_2[97]\n getitem_2762 = _foreach_div_2[98]\n getitem_2763 = _foreach_div_2[99]\n getitem_2764 = _foreach_div_2[100]\n getitem_2765 = _foreach_div_2[101]\n getitem_2766 = _foreach_div_2[102]\n getitem_2767 = _foreach_div_2[103]\n getitem_2768 = _foreach_div_2[104]\n getitem_2769 = _foreach_div_2[105]\n getitem_2770 = _foreach_div_2[106]\n getitem_2771 = _foreach_div_2[107]\n getitem_2772 = _foreach_div_2[108]\n getitem_2773 = _foreach_div_2[109]\n getitem_2774 = _foreach_div_2[110]\n getitem_2775 = _foreach_div_2[111]\n getitem_2776 = _foreach_div_2[112]\n getitem_2777 = _foreach_div_2[113]\n getitem_2778 = _foreach_div_2[114]\n getitem_2779 = _foreach_div_2[115]\n getitem_2780 = _foreach_div_2[116]\n getitem_2781 = _foreach_div_2[117]\n getitem_2782 = _foreach_div_2[118]\n getitem_2783 = _foreach_div_2[119]\n getitem_2784 = _foreach_div_2[120]\n getitem_2785 = _foreach_div_2[121]\n getitem_2786 = _foreach_div_2[122]\n getitem_2787 = _foreach_div_2[123]\n getitem_2788 = _foreach_div_2[124]\n getitem_2789 = _foreach_div_2[125]\n getitem_2790 = _foreach_div_2[126]\n getitem_2791 = _foreach_div_2[127]\n getitem_2792 = _foreach_div_2[128]\n getitem_2793 = _foreach_div_2[129]\n getitem_2794 = _foreach_div_2[130]\n getitem_2795 = _foreach_div_2[131]\n getitem_2796 = _foreach_div_2[132]\n getitem_2797 = _foreach_div_2[133]\n getitem_2798 = _foreach_div_2[134]\n getitem_2799 = _foreach_div_2[135]\n getitem_2800 = _foreach_div_2[136]\n getitem_2801 = _foreach_div_2[137]\n getitem_2802 = _foreach_div_2[138]\n getitem_2803 = _foreach_div_2[139]\n getitem_2804 = _foreach_div_2[140]\n getitem_2805 = _foreach_div_2[141]\n getitem_2806 = _foreach_div_2[142]\n getitem_2807 = _foreach_div_2[143]\n getitem_2808 = _foreach_div_2[144]\n getitem_2809 = _foreach_div_2[145]\n getitem_2810 = _foreach_div_2[146]\n getitem_2811 = _foreach_div_2[147]; _foreach_div_2 = None\n _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]); getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None\n getitem_2812 = _foreach_div_3[0]\n getitem_2813 = _foreach_div_3[1]\n getitem_2814 = _foreach_div_3[2]\n getitem_2815 = _foreach_div_3[3]\n getitem_2816 = _foreach_div_3[4]\n getitem_2817 = _foreach_div_3[5]\n getitem_2818 = _foreach_div_3[6]\n getitem_2819 = _foreach_div_3[7]\n getitem_2820 = _foreach_div_3[8]\n getitem_2821 = _foreach_div_3[9]\n getitem_2822 = _foreach_div_3[10]\n getitem_2823 = _foreach_div_3[11]\n getitem_2824 = _foreach_div_3[12]\n getitem_2825 = _foreach_div_3[13]\n getitem_2826 = _foreach_div_3[14]\n getitem_2827 = _foreach_div_3[15]\n getitem_2828 = _foreach_div_3[16]\n getitem_2829 = _foreach_div_3[17]\n getitem_2830 = _foreach_div_3[18]\n getitem_2831 = _foreach_div_3[19]\n getitem_2832 = _foreach_div_3[20]\n getitem_2833 = _foreach_div_3[21]\n getitem_2834 = _foreach_div_3[22]\n getitem_2835 = _foreach_div_3[23]\n getitem_2836 = _foreach_div_3[24]\n getitem_2837 = _foreach_div_3[25]\n getitem_2838 = _foreach_div_3[26]\n getitem_2839 = _foreach_div_3[27]\n getitem_2840 = _foreach_div_3[28]\n getitem_2841 = _foreach_div_3[29]\n getitem_2842 = _foreach_div_3[30]\n getitem_2843 = _foreach_div_3[31]\n getitem_2844 = _foreach_div_3[32]\n getitem_2845 = _foreach_div_3[33]\n getitem_2846 = _foreach_div_3[34]\n getitem_2847 = _foreach_div_3[35]\n getitem_2848 = _foreach_div_3[36]\n getitem_2849 = _foreach_div_3[37]\n getitem_2850 = _foreach_div_3[38]\n getitem_2851 = _foreach_div_3[39]\n getitem_2852 = _foreach_div_3[40]\n getitem_2853 = _foreach_div_3[41]\n getitem_2854 = _foreach_div_3[42]\n getitem_2855 = _foreach_div_3[43]\n getitem_2856 = _foreach_div_3[44]\n getitem_2857 = _foreach_div_3[45]\n getitem_2858 = _foreach_div_3[46]\n getitem_2859 = _foreach_div_3[47]\n getitem_2860 = _foreach_div_3[48]\n getitem_2861 = _foreach_div_3[49]\n getitem_2862 = _foreach_div_3[50]\n getitem_2863 = _foreach_div_3[51]\n getitem_2864 = _foreach_div_3[52]\n getitem_2865 = _foreach_div_3[53]\n getitem_2866 = _foreach_div_3[54]\n getitem_2867 = _foreach_div_3[55]\n getitem_2868 = _foreach_div_3[56]\n getitem_2869 = _foreach_div_3[57]\n getitem_2870 = _foreach_div_3[58]\n getitem_2871 = _foreach_div_3[59]\n getitem_2872 = _foreach_div_3[60]\n getitem_2873 = _foreach_div_3[61]\n getitem_2874 = _foreach_div_3[62]\n getitem_2875 = _foreach_div_3[63]\n getitem_2876 = _foreach_div_3[64]\n getitem_2877 = _foreach_div_3[65]\n getitem_2878 = _foreach_div_3[66]\n getitem_2879 = _foreach_div_3[67]\n getitem_2880 = _foreach_div_3[68]\n getitem_2881 = _foreach_div_3[69]\n getitem_2882 = _foreach_div_3[70]\n getitem_2883 = _foreach_div_3[71]\n getitem_2884 = _foreach_div_3[72]\n getitem_2885 = _foreach_div_3[73]\n getitem_2886 = _foreach_div_3[74]\n getitem_2887 = _foreach_div_3[75]\n getitem_2888 = _foreach_div_3[76]\n getitem_2889 = _foreach_div_3[77]\n getitem_2890 = _foreach_div_3[78]\n getitem_2891 = _foreach_div_3[79]\n getitem_2892 = _foreach_div_3[80]\n getitem_2893 = _foreach_div_3[81]\n getitem_2894 = _foreach_div_3[82]\n getitem_2895 = _foreach_div_3[83]\n getitem_2896 = _foreach_div_3[84]\n getitem_2897 = _foreach_div_3[85]\n getitem_2898 = _foreach_div_3[86]\n getitem_2899 = _foreach_div_3[87]\n getitem_2900 = _foreach_div_3[88]\n getitem_2901 = _foreach_div_3[89]\n getitem_2902 = _foreach_div_3[90]\n getitem_2903 = _foreach_div_3[91]\n getitem_2904 = _foreach_div_3[92]\n getitem_2905 = _foreach_div_3[93]\n getitem_2906 = _foreach_div_3[94]\n getitem_2907 = _foreach_div_3[95]\n getitem_2908 = _foreach_div_3[96]\n getitem_2909 = _foreach_div_3[97]\n getitem_2910 = _foreach_div_3[98]\n getitem_2911 = _foreach_div_3[99]\n getitem_2912 = _foreach_div_3[100]\n getitem_2913 = _foreach_div_3[101]\n getitem_2914 = _foreach_div_3[102]\n getitem_2915 = _foreach_div_3[103]\n getitem_2916 = _foreach_div_3[104]\n getitem_2917 = _foreach_div_3[105]\n getitem_2918 = _foreach_div_3[106]\n getitem_2919 = _foreach_div_3[107]\n getitem_2920 = _foreach_div_3[108]\n getitem_2921 = _foreach_div_3[109]\n getitem_2922 = _foreach_div_3[110]\n getitem_2923 = _foreach_div_3[111]\n getitem_2924 = _foreach_div_3[112]\n getitem_2925 = _foreach_div_3[113]\n getitem_2926 = _foreach_div_3[114]\n getitem_2927 = _foreach_div_3[115]\n getitem_2928 = _foreach_div_3[116]\n getitem_2929 = _foreach_div_3[117]\n getitem_2930 = _foreach_div_3[118]\n getitem_2931 = _foreach_div_3[119]\n getitem_2932 = _foreach_div_3[120]\n getitem_2933 = _foreach_div_3[121]\n getitem_2934 = _foreach_div_3[122]\n getitem_2935 = _foreach_div_3[123]\n getitem_2936 = _foreach_div_3[124]\n getitem_2937 = _foreach_div_3[125]\n getitem_2938 = _foreach_div_3[126]\n getitem_2939 = _foreach_div_3[127]\n getitem_2940 = _foreach_div_3[128]\n getitem_2941 = _foreach_div_3[129]\n getitem_2942 = _foreach_div_3[130]\n getitem_2943 = _foreach_div_3[131]\n getitem_2944 = _foreach_div_3[132]\n getitem_2945 = _foreach_div_3[133]\n getitem_2946 = _foreach_div_3[134]\n getitem_2947 = _foreach_div_3[135]\n getitem_2948 = _foreach_div_3[136]\n getitem_2949 = _foreach_div_3[137]\n getitem_2950 = _foreach_div_3[138]\n getitem_2951 = _foreach_div_3[139]\n getitem_2952 = _foreach_div_3[140]\n getitem_2953 = _foreach_div_3[141]\n getitem_2954 = _foreach_div_3[142]\n getitem_2955 = _foreach_div_3[143]\n getitem_2956 = _foreach_div_3[144]\n getitem_2957 = _foreach_div_3[145]\n getitem_2958 = _foreach_div_3[146]\n getitem_2959 = _foreach_div_3[147]; _foreach_div_3 = None\n _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]); getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None\n getitem_2960 = _foreach_add_4[0]\n getitem_2961 = _foreach_add_4[1]\n getitem_2962 = _foreach_add_4[2]\n getitem_2963 = _foreach_add_4[3]\n getitem_2964 = _foreach_add_4[4]\n getitem_2965 = _foreach_add_4[5]\n getitem_2966 = _foreach_add_4[6]\n getitem_2967 = _foreach_add_4[7]\n getitem_2968 = _foreach_add_4[8]\n getitem_2969 = _foreach_add_4[9]\n getitem_2970 = _foreach_add_4[10]\n getitem_2971 = _foreach_add_4[11]\n getitem_2972 = _foreach_add_4[12]\n getitem_2973 = _foreach_add_4[13]\n getitem_2974 = _foreach_add_4[14]\n getitem_2975 = _foreach_add_4[15]\n getitem_2976 = _foreach_add_4[16]\n getitem_2977 = _foreach_add_4[17]\n getitem_2978 = _foreach_add_4[18]\n getitem_2979 = _foreach_add_4[19]\n getitem_2980 = _foreach_add_4[20]\n getitem_2981 = _foreach_add_4[21]\n getitem_2982 = _foreach_add_4[22]\n getitem_2983 = _foreach_add_4[23]\n getitem_2984 = _foreach_add_4[24]\n getitem_2985 = _foreach_add_4[25]\n getitem_2986 = _foreach_add_4[26]\n getitem_2987 = _foreach_add_4[27]\n getitem_2988 = _foreach_add_4[28]\n getitem_2989 = _foreach_add_4[29]\n getitem_2990 = _foreach_add_4[30]\n getitem_2991 = _foreach_add_4[31]\n getitem_2992 = _foreach_add_4[32]\n getitem_2993 = _foreach_add_4[33]\n getitem_2994 = _foreach_add_4[34]\n getitem_2995 = _foreach_add_4[35]\n getitem_2996 = _foreach_add_4[36]\n getitem_2997 = _foreach_add_4[37]\n getitem_2998 = _foreach_add_4[38]\n getitem_2999 = _foreach_add_4[39]\n getitem_3000 = _foreach_add_4[40]\n getitem_3001 = _foreach_add_4[41]\n getitem_3002 = _foreach_add_4[42]\n getitem_3003 = _foreach_add_4[43]\n getitem_3004 = _foreach_add_4[44]\n getitem_3005 = _foreach_add_4[45]\n getitem_3006 = _foreach_add_4[46]\n getitem_3007 = _foreach_add_4[47]\n getitem_3008 = _foreach_add_4[48]\n getitem_3009 = _foreach_add_4[49]\n getitem_3010 = _foreach_add_4[50]\n getitem_3011 = _foreach_add_4[51]\n getitem_3012 = _foreach_add_4[52]\n getitem_3013 = _foreach_add_4[53]\n getitem_3014 = _foreach_add_4[54]\n getitem_3015 = _foreach_add_4[55]\n getitem_3016 = _foreach_add_4[56]\n getitem_3017 = _foreach_add_4[57]\n getitem_3018 = _foreach_add_4[58]\n getitem_3019 = _foreach_add_4[59]\n getitem_3020 = _foreach_add_4[60]\n getitem_3021 = _foreach_add_4[61]\n getitem_3022 = _foreach_add_4[62]\n getitem_3023 = _foreach_add_4[63]\n getitem_3024 = _foreach_add_4[64]\n getitem_3025 = _foreach_add_4[65]\n getitem_3026 = _foreach_add_4[66]\n getitem_3027 = _foreach_add_4[67]\n getitem_3028 = _foreach_add_4[68]\n getitem_3029 = _foreach_add_4[69]\n getitem_3030 = _foreach_add_4[70]\n getitem_3031 = _foreach_add_4[71]\n getitem_3032 = _foreach_add_4[72]\n getitem_3033 = _foreach_add_4[73]\n getitem_3034 = _foreach_add_4[74]\n getitem_3035 = _foreach_add_4[75]\n getitem_3036 = _foreach_add_4[76]\n getitem_3037 = _foreach_add_4[77]\n getitem_3038 = _foreach_add_4[78]\n getitem_3039 = _foreach_add_4[79]\n getitem_3040 = _foreach_add_4[80]\n getitem_3041 = _foreach_add_4[81]\n getitem_3042 = _foreach_add_4[82]\n getitem_3043 = _foreach_add_4[83]\n getitem_3044 = _foreach_add_4[84]\n getitem_3045 = _foreach_add_4[85]\n getitem_3046 = _foreach_add_4[86]\n getitem_3047 = _foreach_add_4[87]\n getitem_3048 = _foreach_add_4[88]\n getitem_3049 = _foreach_add_4[89]\n getitem_3050 = _foreach_add_4[90]\n getitem_3051 = _foreach_add_4[91]\n getitem_3052 = _foreach_add_4[92]\n getitem_3053 = _foreach_add_4[93]\n getitem_3054 = _foreach_add_4[94]\n getitem_3055 = _foreach_add_4[95]\n getitem_3056 = _foreach_add_4[96]\n getitem_3057 = _foreach_add_4[97]\n getitem_3058 = _foreach_add_4[98]\n getitem_3059 = _foreach_add_4[99]\n getitem_3060 = _foreach_add_4[100]\n getitem_3061 = _foreach_add_4[101]\n getitem_3062 = _foreach_add_4[102]\n getitem_3063 = _foreach_add_4[103]\n getitem_3064 = _foreach_add_4[104]\n getitem_3065 = _foreach_add_4[105]\n getitem_3066 = _foreach_add_4[106]\n getitem_3067 = _foreach_add_4[107]\n getitem_3068 = _foreach_add_4[108]\n getitem_3069 = _foreach_add_4[109]\n getitem_3070 = _foreach_add_4[110]\n getitem_3071 = _foreach_add_4[111]\n getitem_3072 = _foreach_add_4[112]\n getitem_3073 = _foreach_add_4[113]\n getitem_3074 = _foreach_add_4[114]\n getitem_3075 = _foreach_add_4[115]\n getitem_3076 = _foreach_add_4[116]\n getitem_3077 = _foreach_add_4[117]\n getitem_3078 = _foreach_add_4[118]\n getitem_3079 = _foreach_add_4[119]\n getitem_3080 = _foreach_add_4[120]\n getitem_3081 = _foreach_add_4[121]\n getitem_3082 = _foreach_add_4[122]\n getitem_3083 = _foreach_add_4[123]\n getitem_3084 = _foreach_add_4[124]\n getitem_3085 = _foreach_add_4[125]\n getitem_3086 = _foreach_add_4[126]\n getitem_3087 = _foreach_add_4[127]\n getitem_3088 = _foreach_add_4[128]\n getitem_3089 = _foreach_add_4[129]\n getitem_3090 = _foreach_add_4[130]\n getitem_3091 = _foreach_add_4[131]\n getitem_3092 = _foreach_add_4[132]\n getitem_3093 = _foreach_add_4[133]\n getitem_3094 = _foreach_add_4[134]\n getitem_3095 = _foreach_add_4[135]\n getitem_3096 = _foreach_add_4[136]\n getitem_3097 = _foreach_add_4[137]\n getitem_3098 = _foreach_add_4[138]\n getitem_3099 = _foreach_add_4[139]\n getitem_3100 = _foreach_add_4[140]\n getitem_3101 = _foreach_add_4[141]\n getitem_3102 = _foreach_add_4[142]\n getitem_3103 = _foreach_add_4[143]\n getitem_3104 = _foreach_add_4[144]\n getitem_3105 = _foreach_add_4[145]\n getitem_3106 = _foreach_add_4[146]\n getitem_3107 = _foreach_add_4[147]; _foreach_add_4 = None\n copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960); arg0_1 = getitem_2960 = copy_ = None\n copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961); arg1_1 = getitem_2961 = copy__1 = None\n copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962); arg2_1 = getitem_2962 = copy__2 = None\n copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963); arg3_1 = getitem_2963 = copy__3 = None\n copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964); arg4_1 = getitem_2964 = copy__4 = None\n copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965); arg5_1 = getitem_2965 = copy__5 = None\n copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966); arg6_1 = getitem_2966 = copy__6 = None\n copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967); arg7_1 = getitem_2967 = copy__7 = None\n copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968); arg8_1 = getitem_2968 = copy__8 = None\n copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969); arg9_1 = getitem_2969 = copy__9 = None\n copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970); arg10_1 = getitem_2970 = copy__10 = None\n copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971); arg11_1 = getitem_2971 = copy__11 = None\n copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972); arg12_1 = getitem_2972 = copy__12 = None\n copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973); arg13_1 = getitem_2973 = copy__13 = None\n copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974); arg14_1 = getitem_2974 = copy__14 = None\n copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975); arg15_1 = getitem_2975 = copy__15 = None\n copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976); arg16_1 = getitem_2976 = copy__16 = None\n copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977); arg17_1 = getitem_2977 = copy__17 = None\n copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978); arg18_1 = getitem_2978 = copy__18 = None\n copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979); arg19_1 = getitem_2979 = copy__19 = None\n copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980); arg20_1 = getitem_2980 = copy__20 = None\n copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981); arg21_1 = getitem_2981 = copy__21 = None\n copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982); arg22_1 = getitem_2982 = copy__22 = None\n copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983); arg23_1 = getitem_2983 = copy__23 = None\n copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984); arg24_1 = getitem_2984 = copy__24 = None\n copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985); arg25_1 = getitem_2985 = copy__25 = None\n copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986); arg26_1 = getitem_2986 = copy__26 = None\n copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987); arg27_1 = getitem_2987 = copy__27 = None\n copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988); arg28_1 = getitem_2988 = copy__28 = None\n copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989); arg29_1 = getitem_2989 = copy__29 = None\n copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990); arg30_1 = getitem_2990 = copy__30 = None\n copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991); arg31_1 = getitem_2991 = copy__31 = None\n copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992); arg32_1 = getitem_2992 = copy__32 = None\n copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993); arg33_1 = getitem_2993 = copy__33 = None\n copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994); arg34_1 = getitem_2994 = copy__34 = None\n copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995); arg35_1 = getitem_2995 = copy__35 = None\n copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996); arg36_1 = getitem_2996 = copy__36 = None\n copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997); arg37_1 = getitem_2997 = copy__37 = None\n copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998); arg38_1 = getitem_2998 = copy__38 = None\n copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999); arg39_1 = getitem_2999 = copy__39 = None\n copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000); arg40_1 = getitem_3000 = copy__40 = None\n copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001); arg41_1 = getitem_3001 = copy__41 = None\n copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002); arg42_1 = getitem_3002 = copy__42 = None\n copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003); arg43_1 = getitem_3003 = copy__43 = None\n copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004); arg44_1 = getitem_3004 = copy__44 = None\n copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005); arg45_1 = getitem_3005 = copy__45 = None\n copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006); arg46_1 = getitem_3006 = copy__46 = None\n copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007); arg47_1 = getitem_3007 = copy__47 = None\n copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008); arg48_1 = getitem_3008 = copy__48 = None\n copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009); arg49_1 = getitem_3009 = copy__49 = None\n copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010); arg50_1 = getitem_3010 = copy__50 = None\n copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011); arg51_1 = getitem_3011 = copy__51 = None\n copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012); arg52_1 = getitem_3012 = copy__52 = None\n copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013); arg53_1 = getitem_3013 = copy__53 = None\n copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014); arg54_1 = getitem_3014 = copy__54 = None\n copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015); arg55_1 = getitem_3015 = copy__55 = None\n copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016); arg56_1 = getitem_3016 = copy__56 = None\n copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017); arg57_1 = getitem_3017 = copy__57 = None\n copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018); arg58_1 = getitem_3018 = copy__58 = None\n copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019); arg59_1 = getitem_3019 = copy__59 = None\n copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020); arg60_1 = getitem_3020 = copy__60 = None\n copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021); arg61_1 = getitem_3021 = copy__61 = None\n copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022); arg62_1 = getitem_3022 = copy__62 = None\n copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023); arg63_1 = getitem_3023 = copy__63 = None\n copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024); arg64_1 = getitem_3024 = copy__64 = None\n copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025); arg65_1 = getitem_3025 = copy__65 = None\n copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026); arg66_1 = getitem_3026 = copy__66 = None\n copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027); arg67_1 = getitem_3027 = copy__67 = None\n copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028); arg68_1 = getitem_3028 = copy__68 = None\n copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029); arg69_1 = getitem_3029 = copy__69 = None\n copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030); arg70_1 = getitem_3030 = copy__70 = None\n copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031); arg71_1 = getitem_3031 = copy__71 = None\n copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032); arg72_1 = getitem_3032 = copy__72 = None\n copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033); arg73_1 = getitem_3033 = copy__73 = None\n copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034); arg74_1 = getitem_3034 = copy__74 = None\n copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035); arg75_1 = getitem_3035 = copy__75 = None\n copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036); arg76_1 = getitem_3036 = copy__76 = None\n copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037); arg77_1 = getitem_3037 = copy__77 = None\n copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038); arg78_1 = getitem_3038 = copy__78 = None\n copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039); arg79_1 = getitem_3039 = copy__79 = None\n copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040); arg80_1 = getitem_3040 = copy__80 = None\n copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041); arg81_1 = getitem_3041 = copy__81 = None\n copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042); arg82_1 = getitem_3042 = copy__82 = None\n copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043); arg83_1 = getitem_3043 = copy__83 = None\n copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044); arg84_1 = getitem_3044 = copy__84 = None\n copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045); arg85_1 = getitem_3045 = copy__85 = None\n copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046); arg86_1 = getitem_3046 = copy__86 = None\n copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047); arg87_1 = getitem_3047 = copy__87 = None\n copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048); arg88_1 = getitem_3048 = copy__88 = None\n copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049); arg89_1 = getitem_3049 = copy__89 = None\n copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050); arg90_1 = getitem_3050 = copy__90 = None\n copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051); arg91_1 = getitem_3051 = copy__91 = None\n copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052); arg92_1 = getitem_3052 = copy__92 = None\n copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053); arg93_1 = getitem_3053 = copy__93 = None\n copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054); arg94_1 = getitem_3054 = copy__94 = None\n copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055); arg95_1 = getitem_3055 = copy__95 = None\n copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056); arg96_1 = getitem_3056 = copy__96 = None\n copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057); arg97_1 = getitem_3057 = copy__97 = None\n copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058); arg98_1 = getitem_3058 = copy__98 = None\n copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059); arg99_1 = getitem_3059 = copy__99 = None\n copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060); arg100_1 = getitem_3060 = copy__100 = None\n copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061); arg101_1 = getitem_3061 = copy__101 = None\n copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062); arg102_1 = getitem_3062 = copy__102 = None\n copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063); arg103_1 = getitem_3063 = copy__103 = None\n copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064); arg104_1 = getitem_3064 = copy__104 = None\n copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065); arg105_1 = getitem_3065 = copy__105 = None\n copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066); arg106_1 = getitem_3066 = copy__106 = None\n copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067); arg107_1 = getitem_3067 = copy__107 = None\n copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068); arg108_1 = getitem_3068 = copy__108 = None\n copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069); arg109_1 = getitem_3069 = copy__109 = None\n copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070); arg110_1 = getitem_3070 = copy__110 = None\n copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071); arg111_1 = getitem_3071 = copy__111 = None\n copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072); arg112_1 = getitem_3072 = copy__112 = None\n copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073); arg113_1 = getitem_3073 = copy__113 = None\n copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074); arg114_1 = getitem_3074 = copy__114 = None\n copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075); arg115_1 = getitem_3075 = copy__115 = None\n copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076); arg116_1 = getitem_3076 = copy__116 = None\n copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077); arg117_1 = getitem_3077 = copy__117 = None\n copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078); arg118_1 = getitem_3078 = copy__118 = None\n copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079); arg119_1 = getitem_3079 = copy__119 = None\n copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080); arg120_1 = getitem_3080 = copy__120 = None\n copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081); arg121_1 = getitem_3081 = copy__121 = None\n copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082); arg122_1 = getitem_3082 = copy__122 = None\n copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083); arg123_1 = getitem_3083 = copy__123 = None\n copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084); arg124_1 = getitem_3084 = copy__124 = None\n copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085); arg125_1 = getitem_3085 = copy__125 = None\n copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086); arg126_1 = getitem_3086 = copy__126 = None\n copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087); arg127_1 = getitem_3087 = copy__127 = None\n copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088); arg128_1 = getitem_3088 = copy__128 = None\n copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089); arg129_1 = getitem_3089 = copy__129 = None\n copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090); arg130_1 = getitem_3090 = copy__130 = None\n copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091); arg131_1 = getitem_3091 = copy__131 = None\n copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092); arg132_1 = getitem_3092 = copy__132 = None\n copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093); arg133_1 = getitem_3093 = copy__133 = None\n copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094); arg134_1 = getitem_3094 = copy__134 = None\n copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095); arg135_1 = getitem_3095 = copy__135 = None\n copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096); arg136_1 = getitem_3096 = copy__136 = None\n copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097); arg137_1 = getitem_3097 = copy__137 = None\n copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098); arg138_1 = getitem_3098 = copy__138 = None\n copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099); arg139_1 = getitem_3099 = copy__139 = None\n copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100); arg140_1 = getitem_3100 = copy__140 = None\n copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101); arg141_1 = getitem_3101 = copy__141 = None\n copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102); arg142_1 = getitem_3102 = copy__142 = None\n copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103); arg143_1 = getitem_3103 = copy__143 = None\n copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104); arg144_1 = getitem_3104 = copy__144 = None\n copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105); arg145_1 = getitem_3105 = copy__145 = None\n copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106); arg146_1 = getitem_3106 = copy__146 = None\n copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107); arg147_1 = getitem_3107 = copy__147 = None\n copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1); arg148_1 = getitem_1 = copy__148 = None\n copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445); arg149_1 = getitem_445 = copy__149 = None\n copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889); arg150_1 = getitem_889 = copy__150 = None\n copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444); arg299_1 = getitem_444 = copy__151 = None\n copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446); arg300_1 = getitem_446 = copy__152 = None\n copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447); arg301_1 = getitem_447 = copy__153 = None\n copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448); arg302_1 = getitem_448 = copy__154 = None\n copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449); arg303_1 = getitem_449 = copy__155 = None\n copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450); arg304_1 = getitem_450 = copy__156 = None\n copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451); arg305_1 = getitem_451 = copy__157 = None\n copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452); arg306_1 = getitem_452 = copy__158 = None\n copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453); arg307_1 = getitem_453 = copy__159 = None\n copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454); arg308_1 = getitem_454 = copy__160 = None\n copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455); arg309_1 = getitem_455 = copy__161 = None\n copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456); arg310_1 = getitem_456 = copy__162 = None\n copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457); arg311_1 = getitem_457 = copy__163 = None\n copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458); arg312_1 = getitem_458 = copy__164 = None\n copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459); arg313_1 = getitem_459 = copy__165 = None\n copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460); arg314_1 = getitem_460 = copy__166 = None\n copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461); arg315_1 = getitem_461 = copy__167 = None\n copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462); arg316_1 = getitem_462 = copy__168 = None\n copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463); arg317_1 = getitem_463 = copy__169 = None\n copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464); arg318_1 = getitem_464 = copy__170 = None\n copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465); arg319_1 = getitem_465 = copy__171 = None\n copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466); arg320_1 = getitem_466 = copy__172 = None\n copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467); arg321_1 = getitem_467 = copy__173 = None\n copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468); arg322_1 = getitem_468 = copy__174 = None\n copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469); arg323_1 = getitem_469 = copy__175 = None\n copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470); arg324_1 = getitem_470 = copy__176 = None\n copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471); arg325_1 = getitem_471 = copy__177 = None\n copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472); arg326_1 = getitem_472 = copy__178 = None\n copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473); arg327_1 = getitem_473 = copy__179 = None\n copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474); arg328_1 = getitem_474 = copy__180 = None\n copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475); arg329_1 = getitem_475 = copy__181 = None\n copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476); arg330_1 = getitem_476 = copy__182 = None\n copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477); arg331_1 = getitem_477 = copy__183 = None\n copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478); arg332_1 = getitem_478 = copy__184 = None\n copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479); arg333_1 = getitem_479 = copy__185 = None\n copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480); arg334_1 = getitem_480 = copy__186 = None\n copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481); arg335_1 = getitem_481 = copy__187 = None\n copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482); arg336_1 = getitem_482 = copy__188 = None\n copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483); arg337_1 = getitem_483 = copy__189 = None\n copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484); arg338_1 = getitem_484 = copy__190 = None\n copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485); arg339_1 = getitem_485 = copy__191 = None\n copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486); arg340_1 = getitem_486 = copy__192 = None\n copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487); arg341_1 = getitem_487 = copy__193 = None\n copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488); arg342_1 = getitem_488 = copy__194 = None\n copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489); arg343_1 = getitem_489 = copy__195 = None\n copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490); arg344_1 = getitem_490 = copy__196 = None\n copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491); arg345_1 = getitem_491 = copy__197 = None\n copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492); arg346_1 = getitem_492 = copy__198 = None\n copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493); arg347_1 = getitem_493 = copy__199 = None\n copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494); arg348_1 = getitem_494 = copy__200 = None\n copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495); arg349_1 = getitem_495 = copy__201 = None\n copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496); arg350_1 = getitem_496 = copy__202 = None\n copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497); arg351_1 = getitem_497 = copy__203 = None\n copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498); arg352_1 = getitem_498 = copy__204 = None\n copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499); arg353_1 = getitem_499 = copy__205 = None\n copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500); arg354_1 = getitem_500 = copy__206 = None\n copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501); arg355_1 = getitem_501 = copy__207 = None\n copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502); arg356_1 = getitem_502 = copy__208 = None\n copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503); arg357_1 = getitem_503 = copy__209 = None\n copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504); arg358_1 = getitem_504 = copy__210 = None\n copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505); arg359_1 = getitem_505 = copy__211 = None\n copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506); arg360_1 = getitem_506 = copy__212 = None\n copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507); arg361_1 = getitem_507 = copy__213 = None\n copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508); arg362_1 = getitem_508 = copy__214 = None\n copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509); arg363_1 = getitem_509 = copy__215 = None\n copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510); arg364_1 = getitem_510 = copy__216 = None\n copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511); arg365_1 = getitem_511 = copy__217 = None\n copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512); arg366_1 = getitem_512 = copy__218 = None\n copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513); arg367_1 = getitem_513 = copy__219 = None\n copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514); arg368_1 = getitem_514 = copy__220 = None\n copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515); arg369_1 = getitem_515 = copy__221 = None\n copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516); arg370_1 = getitem_516 = copy__222 = None\n copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517); arg371_1 = getitem_517 = copy__223 = None\n copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518); arg372_1 = getitem_518 = copy__224 = None\n copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519); arg373_1 = getitem_519 = copy__225 = None\n copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520); arg374_1 = getitem_520 = copy__226 = None\n copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521); arg375_1 = getitem_521 = copy__227 = None\n copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522); arg376_1 = getitem_522 = copy__228 = None\n copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523); arg377_1 = getitem_523 = copy__229 = None\n copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524); arg378_1 = getitem_524 = copy__230 = None\n copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525); arg379_1 = getitem_525 = copy__231 = None\n copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526); arg380_1 = getitem_526 = copy__232 = None\n copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527); arg381_1 = getitem_527 = copy__233 = None\n copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528); arg382_1 = getitem_528 = copy__234 = None\n copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529); arg383_1 = getitem_529 = copy__235 = None\n copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530); arg384_1 = getitem_530 = copy__236 = None\n copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531); arg385_1 = getitem_531 = copy__237 = None\n copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532); arg386_1 = getitem_532 = copy__238 = None\n copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533); arg387_1 = getitem_533 = copy__239 = None\n copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534); arg388_1 = getitem_534 = copy__240 = None\n copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535); arg389_1 = getitem_535 = copy__241 = None\n copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536); arg390_1 = getitem_536 = copy__242 = None\n copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537); arg391_1 = getitem_537 = copy__243 = None\n copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538); arg392_1 = getitem_538 = copy__244 = None\n copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539); arg393_1 = getitem_539 = copy__245 = None\n copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540); arg394_1 = getitem_540 = copy__246 = None\n copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541); arg395_1 = getitem_541 = copy__247 = None\n copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542); arg396_1 = getitem_542 = copy__248 = None\n copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543); arg397_1 = getitem_543 = copy__249 = None\n copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544); arg398_1 = getitem_544 = copy__250 = None\n copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545); arg399_1 = getitem_545 = copy__251 = None\n copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546); arg400_1 = getitem_546 = copy__252 = None\n copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547); arg401_1 = getitem_547 = copy__253 = None\n copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548); arg402_1 = getitem_548 = copy__254 = None\n copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549); arg403_1 = getitem_549 = copy__255 = None\n copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550); arg404_1 = getitem_550 = copy__256 = None\n copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551); arg405_1 = getitem_551 = copy__257 = None\n copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552); arg406_1 = getitem_552 = copy__258 = None\n copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553); arg407_1 = getitem_553 = copy__259 = None\n copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554); arg408_1 = getitem_554 = copy__260 = None\n copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555); arg409_1 = getitem_555 = copy__261 = None\n copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556); arg410_1 = getitem_556 = copy__262 = None\n copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557); arg411_1 = getitem_557 = copy__263 = None\n copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558); arg412_1 = getitem_558 = copy__264 = None\n copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559); arg413_1 = getitem_559 = copy__265 = None\n copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560); arg414_1 = getitem_560 = copy__266 = None\n copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561); arg415_1 = getitem_561 = copy__267 = None\n copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562); arg416_1 = getitem_562 = copy__268 = None\n copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563); arg417_1 = getitem_563 = copy__269 = None\n copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564); arg418_1 = getitem_564 = copy__270 = None\n copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565); arg419_1 = getitem_565 = copy__271 = None\n copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566); arg420_1 = getitem_566 = copy__272 = None\n copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567); arg421_1 = getitem_567 = copy__273 = None\n copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568); arg422_1 = getitem_568 = copy__274 = None\n copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569); arg423_1 = getitem_569 = copy__275 = None\n copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570); arg424_1 = getitem_570 = copy__276 = None\n copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571); arg425_1 = getitem_571 = copy__277 = None\n copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572); arg426_1 = getitem_572 = copy__278 = None\n copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573); arg427_1 = getitem_573 = copy__279 = None\n copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574); arg428_1 = getitem_574 = copy__280 = None\n copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575); arg429_1 = getitem_575 = copy__281 = None\n copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576); arg430_1 = getitem_576 = copy__282 = None\n copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577); arg431_1 = getitem_577 = copy__283 = None\n copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578); arg432_1 = getitem_578 = copy__284 = None\n copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579); arg433_1 = getitem_579 = copy__285 = None\n copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580); arg434_1 = getitem_580 = copy__286 = None\n copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581); arg435_1 = getitem_581 = copy__287 = None\n copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582); arg436_1 = getitem_582 = copy__288 = None\n copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583); arg437_1 = getitem_583 = copy__289 = None\n copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584); arg438_1 = getitem_584 = copy__290 = None\n copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585); arg439_1 = getitem_585 = copy__291 = None\n copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586); arg440_1 = getitem_586 = copy__292 = None\n copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587); arg441_1 = getitem_587 = copy__293 = None\n copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588); arg442_1 = getitem_588 = copy__294 = None\n copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589); arg443_1 = getitem_589 = copy__295 = None\n copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590); arg444_1 = getitem_590 = copy__296 = None\n copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591); arg445_1 = getitem_591 = copy__297 = None\n copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888); arg446_1 = getitem_888 = copy__298 = None\n copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890); arg447_1 = getitem_890 = copy__299 = None\n copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891); arg448_1 = getitem_891 = copy__300 = None\n copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892); arg449_1 = getitem_892 = copy__301 = None\n copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893); arg450_1 = getitem_893 = copy__302 = None\n copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894); arg451_1 = getitem_894 = copy__303 = None\n copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895); arg452_1 = getitem_895 = copy__304 = None\n copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896); arg453_1 = getitem_896 = copy__305 = None\n copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897); arg454_1 = getitem_897 = copy__306 = None\n copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898); arg455_1 = getitem_898 = copy__307 = None\n copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899); arg456_1 = getitem_899 = copy__308 = None\n copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900); arg457_1 = getitem_900 = copy__309 = None\n copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901); arg458_1 = getitem_901 = copy__310 = None\n copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902); arg459_1 = getitem_902 = copy__311 = None\n copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903); arg460_1 = getitem_903 = copy__312 = None\n copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904); arg461_1 = getitem_904 = copy__313 = None\n copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905); arg462_1 = getitem_905 = copy__314 = None\n copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906); arg463_1 = getitem_906 = copy__315 = None\n copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907); arg464_1 = getitem_907 = copy__316 = None\n copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908); arg465_1 = getitem_908 = copy__317 = None\n copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909); arg466_1 = getitem_909 = copy__318 = None\n copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910); arg467_1 = getitem_910 = copy__319 = None\n copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911); arg468_1 = getitem_911 = copy__320 = None\n copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912); arg469_1 = getitem_912 = copy__321 = None\n copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913); arg470_1 = getitem_913 = copy__322 = None\n copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914); arg471_1 = getitem_914 = copy__323 = None\n copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915); arg472_1 = getitem_915 = copy__324 = None\n copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916); arg473_1 = getitem_916 = copy__325 = None\n copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917); arg474_1 = getitem_917 = copy__326 = None\n copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918); arg475_1 = getitem_918 = copy__327 = None\n copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919); arg476_1 = getitem_919 = copy__328 = None\n copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920); arg477_1 = getitem_920 = copy__329 = None\n copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921); arg478_1 = getitem_921 = copy__330 = None\n copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922); arg479_1 = getitem_922 = copy__331 = None\n copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923); arg480_1 = getitem_923 = copy__332 = None\n copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924); arg481_1 = getitem_924 = copy__333 = None\n copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925); arg482_1 = getitem_925 = copy__334 = None\n copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926); arg483_1 = getitem_926 = copy__335 = None\n copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927); arg484_1 = getitem_927 = copy__336 = None\n copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928); arg485_1 = getitem_928 = copy__337 = None\n copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929); arg486_1 = getitem_929 = copy__338 = None\n copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930); arg487_1 = getitem_930 = copy__339 = None\n copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931); arg488_1 = getitem_931 = copy__340 = None\n copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932); arg489_1 = getitem_932 = copy__341 = None\n copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933); arg490_1 = getitem_933 = copy__342 = None\n copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934); arg491_1 = getitem_934 = copy__343 = None\n copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935); arg492_1 = getitem_935 = copy__344 = None\n copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936); arg493_1 = getitem_936 = copy__345 = None\n copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937); arg494_1 = getitem_937 = copy__346 = None\n copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938); arg495_1 = getitem_938 = copy__347 = None\n copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939); arg496_1 = getitem_939 = copy__348 = None\n copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940); arg497_1 = getitem_940 = copy__349 = None\n copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941); arg498_1 = getitem_941 = copy__350 = None\n copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942); arg499_1 = getitem_942 = copy__351 = None\n copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943); arg500_1 = getitem_943 = copy__352 = None\n copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944); arg501_1 = getitem_944 = copy__353 = None\n copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945); arg502_1 = getitem_945 = copy__354 = None\n copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946); arg503_1 = getitem_946 = copy__355 = None\n copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947); arg504_1 = getitem_947 = copy__356 = None\n copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948); arg505_1 = getitem_948 = copy__357 = None\n copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949); arg506_1 = getitem_949 = copy__358 = None\n copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950); arg507_1 = getitem_950 = copy__359 = None\n copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951); arg508_1 = getitem_951 = copy__360 = None\n copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952); arg509_1 = getitem_952 = copy__361 = None\n copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953); arg510_1 = getitem_953 = copy__362 = None\n copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954); arg511_1 = getitem_954 = copy__363 = None\n copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955); arg512_1 = getitem_955 = copy__364 = None\n copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956); arg513_1 = getitem_956 = copy__365 = None\n copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957); arg514_1 = getitem_957 = copy__366 = None\n copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958); arg515_1 = getitem_958 = copy__367 = None\n copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959); arg516_1 = getitem_959 = copy__368 = None\n copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960); arg517_1 = getitem_960 = copy__369 = None\n copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961); arg518_1 = getitem_961 = copy__370 = None\n copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962); arg519_1 = getitem_962 = copy__371 = None\n copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963); arg520_1 = getitem_963 = copy__372 = None\n copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964); arg521_1 = getitem_964 = copy__373 = None\n copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965); arg522_1 = getitem_965 = copy__374 = None\n copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966); arg523_1 = getitem_966 = copy__375 = None\n copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967); arg524_1 = getitem_967 = copy__376 = None\n copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968); arg525_1 = getitem_968 = copy__377 = None\n copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969); arg526_1 = getitem_969 = copy__378 = None\n copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970); arg527_1 = getitem_970 = copy__379 = None\n copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971); arg528_1 = getitem_971 = copy__380 = None\n copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972); arg529_1 = getitem_972 = copy__381 = None\n copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973); arg530_1 = getitem_973 = copy__382 = None\n copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974); arg531_1 = getitem_974 = copy__383 = None\n copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975); arg532_1 = getitem_975 = copy__384 = None\n copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976); arg533_1 = getitem_976 = copy__385 = None\n copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977); arg534_1 = getitem_977 = copy__386 = None\n copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978); arg535_1 = getitem_978 = copy__387 = None\n copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979); arg536_1 = getitem_979 = copy__388 = None\n copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980); arg537_1 = getitem_980 = copy__389 = None\n copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981); arg538_1 = getitem_981 = copy__390 = None\n copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982); arg539_1 = getitem_982 = copy__391 = None\n copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983); arg540_1 = getitem_983 = copy__392 = None\n copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984); arg541_1 = getitem_984 = copy__393 = None\n copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985); arg542_1 = getitem_985 = copy__394 = None\n copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986); arg543_1 = getitem_986 = copy__395 = None\n copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987); arg544_1 = getitem_987 = copy__396 = None\n copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988); arg545_1 = getitem_988 = copy__397 = None\n copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989); arg546_1 = getitem_989 = copy__398 = None\n copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990); arg547_1 = getitem_990 = copy__399 = None\n copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991); arg548_1 = getitem_991 = copy__400 = None\n copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992); arg549_1 = getitem_992 = copy__401 = None\n copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993); arg550_1 = getitem_993 = copy__402 = None\n copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994); arg551_1 = getitem_994 = copy__403 = None\n copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995); arg552_1 = getitem_995 = copy__404 = None\n copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996); arg553_1 = getitem_996 = copy__405 = None\n copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997); arg554_1 = getitem_997 = copy__406 = None\n copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998); arg555_1 = getitem_998 = copy__407 = None\n copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999); arg556_1 = getitem_999 = copy__408 = None\n copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000); arg557_1 = getitem_1000 = copy__409 = None\n copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001); arg558_1 = getitem_1001 = copy__410 = None\n copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002); arg559_1 = getitem_1002 = copy__411 = None\n copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003); arg560_1 = getitem_1003 = copy__412 = None\n copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004); arg561_1 = getitem_1004 = copy__413 = None\n copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005); arg562_1 = getitem_1005 = copy__414 = None\n copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006); arg563_1 = getitem_1006 = copy__415 = None\n copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007); arg564_1 = getitem_1007 = copy__416 = None\n copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008); arg565_1 = getitem_1008 = copy__417 = None\n copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009); arg566_1 = getitem_1009 = copy__418 = None\n copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010); arg567_1 = getitem_1010 = copy__419 = None\n copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011); arg568_1 = getitem_1011 = copy__420 = None\n copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012); arg569_1 = getitem_1012 = copy__421 = None\n copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013); arg570_1 = getitem_1013 = copy__422 = None\n copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014); arg571_1 = getitem_1014 = copy__423 = None\n copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015); arg572_1 = getitem_1015 = copy__424 = None\n copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016); arg573_1 = getitem_1016 = copy__425 = None\n copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017); arg574_1 = getitem_1017 = copy__426 = None\n copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018); arg575_1 = getitem_1018 = copy__427 = None\n copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019); arg576_1 = getitem_1019 = copy__428 = None\n copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020); arg577_1 = getitem_1020 = copy__429 = None\n copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021); arg578_1 = getitem_1021 = copy__430 = None\n copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022); arg579_1 = getitem_1022 = copy__431 = None\n copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023); arg580_1 = getitem_1023 = copy__432 = None\n copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024); arg581_1 = getitem_1024 = copy__433 = None\n copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025); arg582_1 = getitem_1025 = copy__434 = None\n copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026); arg583_1 = getitem_1026 = copy__435 = None\n copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027); arg584_1 = getitem_1027 = copy__436 = None\n copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028); arg585_1 = getitem_1028 = copy__437 = None\n copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029); arg586_1 = getitem_1029 = copy__438 = None\n copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030); arg587_1 = getitem_1030 = copy__439 = None\n copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031); arg588_1 = getitem_1031 = copy__440 = None\n copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032); arg589_1 = getitem_1032 = copy__441 = None\n copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033); arg590_1 = getitem_1033 = copy__442 = None\n copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034); arg591_1 = getitem_1034 = copy__443 = None\n copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035); arg592_1 = getitem_1035 = copy__444 = None\n copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem); arg593_1 = getitem = copy__445 = None\n copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2); arg594_1 = getitem_2 = copy__446 = None\n copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3); arg595_1 = getitem_3 = copy__447 = None\n copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4); arg596_1 = getitem_4 = copy__448 = None\n copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5); arg597_1 = getitem_5 = copy__449 = None\n copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6); arg598_1 = getitem_6 = copy__450 = None\n copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7); arg599_1 = getitem_7 = copy__451 = None\n copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8); arg600_1 = getitem_8 = copy__452 = None\n copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9); arg601_1 = getitem_9 = copy__453 = None\n copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10); arg602_1 = getitem_10 = copy__454 = None\n copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11); arg603_1 = getitem_11 = copy__455 = None\n copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12); arg604_1 = getitem_12 = copy__456 = None\n copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13); arg605_1 = getitem_13 = copy__457 = None\n copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14); arg606_1 = getitem_14 = copy__458 = None\n copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15); arg607_1 = getitem_15 = copy__459 = None\n copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16); arg608_1 = getitem_16 = copy__460 = None\n copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17); arg609_1 = getitem_17 = copy__461 = None\n copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18); arg610_1 = getitem_18 = copy__462 = None\n copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19); arg611_1 = getitem_19 = copy__463 = None\n copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20); arg612_1 = getitem_20 = copy__464 = None\n copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21); arg613_1 = getitem_21 = copy__465 = None\n copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22); arg614_1 = getitem_22 = copy__466 = None\n copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23); arg615_1 = getitem_23 = copy__467 = None\n copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24); arg616_1 = getitem_24 = copy__468 = None\n copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25); arg617_1 = getitem_25 = copy__469 = None\n copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26); arg618_1 = getitem_26 = copy__470 = None\n copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27); arg619_1 = getitem_27 = copy__471 = None\n copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28); arg620_1 = getitem_28 = copy__472 = None\n copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29); arg621_1 = getitem_29 = copy__473 = None\n copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30); arg622_1 = getitem_30 = copy__474 = None\n copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31); arg623_1 = getitem_31 = copy__475 = None\n copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32); arg624_1 = getitem_32 = copy__476 = None\n copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33); arg625_1 = getitem_33 = copy__477 = None\n copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34); arg626_1 = getitem_34 = copy__478 = None\n copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35); arg627_1 = getitem_35 = copy__479 = None\n copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36); arg628_1 = getitem_36 = copy__480 = None\n copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37); arg629_1 = getitem_37 = copy__481 = None\n copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38); arg630_1 = getitem_38 = copy__482 = None\n copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39); arg631_1 = getitem_39 = copy__483 = None\n copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40); arg632_1 = getitem_40 = copy__484 = None\n copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41); arg633_1 = getitem_41 = copy__485 = None\n copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42); arg634_1 = getitem_42 = copy__486 = None\n copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43); arg635_1 = getitem_43 = copy__487 = None\n copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44); arg636_1 = getitem_44 = copy__488 = None\n copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45); arg637_1 = getitem_45 = copy__489 = None\n copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46); arg638_1 = getitem_46 = copy__490 = None\n copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47); arg639_1 = getitem_47 = copy__491 = None\n copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48); arg640_1 = getitem_48 = copy__492 = None\n copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49); arg641_1 = getitem_49 = copy__493 = None\n copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50); arg642_1 = getitem_50 = copy__494 = None\n copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51); arg643_1 = getitem_51 = copy__495 = None\n copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52); arg644_1 = getitem_52 = copy__496 = None\n copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53); arg645_1 = getitem_53 = copy__497 = None\n copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54); arg646_1 = getitem_54 = copy__498 = None\n copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55); arg647_1 = getitem_55 = copy__499 = None\n copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56); arg648_1 = getitem_56 = copy__500 = None\n copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57); arg649_1 = getitem_57 = copy__501 = None\n copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58); arg650_1 = getitem_58 = copy__502 = None\n copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59); arg651_1 = getitem_59 = copy__503 = None\n copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60); arg652_1 = getitem_60 = copy__504 = None\n copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61); arg653_1 = getitem_61 = copy__505 = None\n copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62); arg654_1 = getitem_62 = copy__506 = None\n copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63); arg655_1 = getitem_63 = copy__507 = None\n copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64); arg656_1 = getitem_64 = copy__508 = None\n copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65); arg657_1 = getitem_65 = copy__509 = None\n copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66); arg658_1 = getitem_66 = copy__510 = None\n copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67); arg659_1 = getitem_67 = copy__511 = None\n copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68); arg660_1 = getitem_68 = copy__512 = None\n copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69); arg661_1 = getitem_69 = copy__513 = None\n copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70); arg662_1 = getitem_70 = copy__514 = None\n copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71); arg663_1 = getitem_71 = copy__515 = None\n copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72); arg664_1 = getitem_72 = copy__516 = None\n copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73); arg665_1 = getitem_73 = copy__517 = None\n copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74); arg666_1 = getitem_74 = copy__518 = None\n copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75); arg667_1 = getitem_75 = copy__519 = None\n copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76); arg668_1 = getitem_76 = copy__520 = None\n copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77); arg669_1 = getitem_77 = copy__521 = None\n copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78); arg670_1 = getitem_78 = copy__522 = None\n copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79); arg671_1 = getitem_79 = copy__523 = None\n copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80); arg672_1 = getitem_80 = copy__524 = None\n copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81); arg673_1 = getitem_81 = copy__525 = None\n copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82); arg674_1 = getitem_82 = copy__526 = None\n copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83); arg675_1 = getitem_83 = copy__527 = None\n copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84); arg676_1 = getitem_84 = copy__528 = None\n copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85); arg677_1 = getitem_85 = copy__529 = None\n copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86); arg678_1 = getitem_86 = copy__530 = None\n copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87); arg679_1 = getitem_87 = copy__531 = None\n copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88); arg680_1 = getitem_88 = copy__532 = None\n copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89); arg681_1 = getitem_89 = copy__533 = None\n copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90); arg682_1 = getitem_90 = copy__534 = None\n copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91); arg683_1 = getitem_91 = copy__535 = None\n copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92); arg684_1 = getitem_92 = copy__536 = None\n copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93); arg685_1 = getitem_93 = copy__537 = None\n copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94); arg686_1 = getitem_94 = copy__538 = None\n copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95); arg687_1 = getitem_95 = copy__539 = None\n copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96); arg688_1 = getitem_96 = copy__540 = None\n copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97); arg689_1 = getitem_97 = copy__541 = None\n copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98); arg690_1 = getitem_98 = copy__542 = None\n copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99); arg691_1 = getitem_99 = copy__543 = None\n copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100); arg692_1 = getitem_100 = copy__544 = None\n copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101); arg693_1 = getitem_101 = copy__545 = None\n copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102); arg694_1 = getitem_102 = copy__546 = None\n copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103); arg695_1 = getitem_103 = copy__547 = None\n copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104); arg696_1 = getitem_104 = copy__548 = None\n copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105); arg697_1 = getitem_105 = copy__549 = None\n copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106); arg698_1 = getitem_106 = copy__550 = None\n copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107); arg699_1 = getitem_107 = copy__551 = None\n copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108); arg700_1 = getitem_108 = copy__552 = None\n copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109); arg701_1 = getitem_109 = copy__553 = None\n copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110); arg702_1 = getitem_110 = copy__554 = None\n copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111); arg703_1 = getitem_111 = copy__555 = None\n copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112); arg704_1 = getitem_112 = copy__556 = None\n copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113); arg705_1 = getitem_113 = copy__557 = None\n copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114); arg706_1 = getitem_114 = copy__558 = None\n copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115); arg707_1 = getitem_115 = copy__559 = None\n copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116); arg708_1 = getitem_116 = copy__560 = None\n copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117); arg709_1 = getitem_117 = copy__561 = None\n copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118); arg710_1 = getitem_118 = copy__562 = None\n copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119); arg711_1 = getitem_119 = copy__563 = None\n copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120); arg712_1 = getitem_120 = copy__564 = None\n copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121); arg713_1 = getitem_121 = copy__565 = None\n copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122); arg714_1 = getitem_122 = copy__566 = None\n copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123); arg715_1 = getitem_123 = copy__567 = None\n copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124); arg716_1 = getitem_124 = copy__568 = None\n copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125); arg717_1 = getitem_125 = copy__569 = None\n copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126); arg718_1 = getitem_126 = copy__570 = None\n copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127); arg719_1 = getitem_127 = copy__571 = None\n copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128); arg720_1 = getitem_128 = copy__572 = None\n copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129); arg721_1 = getitem_129 = copy__573 = None\n copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130); arg722_1 = getitem_130 = copy__574 = None\n copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131); arg723_1 = getitem_131 = copy__575 = None\n copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132); arg724_1 = getitem_132 = copy__576 = None\n copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133); arg725_1 = getitem_133 = copy__577 = None\n copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134); arg726_1 = getitem_134 = copy__578 = None\n copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135); arg727_1 = getitem_135 = copy__579 = None\n copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136); arg728_1 = getitem_136 = copy__580 = None\n copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137); arg729_1 = getitem_137 = copy__581 = None\n copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138); arg730_1 = getitem_138 = copy__582 = None\n copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139); arg731_1 = getitem_139 = copy__583 = None\n copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140); arg732_1 = getitem_140 = copy__584 = None\n copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141); arg733_1 = getitem_141 = copy__585 = None\n copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142); arg734_1 = getitem_142 = copy__586 = None\n copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143); arg735_1 = getitem_143 = copy__587 = None\n copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144); arg736_1 = getitem_144 = copy__588 = None\n copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145); arg737_1 = getitem_145 = copy__589 = None\n copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146); arg738_1 = getitem_146 = copy__590 = None\n copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147); arg739_1 = getitem_147 = copy__591 = None\n return ()\n \n# To see more debug info, please use `graph_module.print_readable()`", + "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ucvyfteusmf3hkyqsnlp5ug5dh4kqnbxlr56s7pvkidgpyg2jx5] example_inputs[151]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6gu2nloxzcxrfltpbua7lqp7kkjihxd4w7afynotijqr3v7nhh5] example_inputs[152]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[164]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[165]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[177]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[178]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[184]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[261]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[262]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[263]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[264]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[265]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[266]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[267]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[268]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[269]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[270]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[271]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[272]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[273]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[274]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[275]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[276]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[277]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[278]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[279]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[280]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[281]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[282]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[283]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[284]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[285]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[286]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[287]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[288]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[289]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[290]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[291]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[292]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[293]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[294]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[295]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[296]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[297]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[298]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[299]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[300]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[301]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[302]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[303]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[304]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[305]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[306]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[307]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[308]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[309]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[310]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[311]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[312]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[313]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[314]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[315]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[316]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[317]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[318]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[319]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[320]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[321]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[322]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[323]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[324]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[325]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[326]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[327]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[328]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[329]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[330]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[331]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[332]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[333]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[334]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[335]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[336]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[337]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[338]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[339]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[340]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[341]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[342]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[343]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[344]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[345]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[346]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[347]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[348]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[349]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[350]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[351]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[352]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[353]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[354]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[355]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[356]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[357]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[358]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[359]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[360]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[361]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[362]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[363]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[364]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[365]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[366]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[367]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[368]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[369]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[370]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[371]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[372]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[373]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[374]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[375]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[376]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[377]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[378]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[379]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[380]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[381]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[382]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[383]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[384]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[385]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[386]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[387]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[388]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[389]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[390]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[391]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[392]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[393]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[394]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[395]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[396]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[397]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[398]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[399]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[400]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[401]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[402]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[403]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[404]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[405]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[406]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[407]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[408]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[409]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[410]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[411]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[412]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[413]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[414]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[415]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[416]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[417]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[418]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[419]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[420]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[421]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[422]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[423]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[424]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[425]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[426]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[427]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[428]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[429]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[430]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[431]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[432]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[433]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[434]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[435]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[436]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[437]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[438]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[439]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[440]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[441]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[442]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[443]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[444]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[445]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[446]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[447]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[448]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[449]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[450]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[451]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[452]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[453]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[454]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[455]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[456]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[457]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[458]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[459]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[460]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[461]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[462]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[463]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[464]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[465]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[466]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[467]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[468]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[469]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[470]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[471]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[472]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[473]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[474]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[475]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[476]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[477]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[478]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[479]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[480]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[481]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[482]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[483]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[484]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[485]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[486]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[487]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[488]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[489]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[490]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[491]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[492]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[493]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[494]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[495]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[496]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[497]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[498]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[499]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[500]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[501]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[502]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[503]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[504]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[505]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[506]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[507]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[508]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[509]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[510]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[511]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[512]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[513]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[514]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[515]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[516]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[517]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[518]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[519]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[520]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[521]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[522]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[523]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[524]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[525]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[526]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[527]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[528]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[529]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[530]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[531]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[532]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[533]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[534]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[535]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[536]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[537]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[538]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[539]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[540]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[541]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[542]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[543]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[544]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[545]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[546]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[547]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[548]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[549]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[550]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[551]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[552]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[553]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[554]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[555]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[556]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[557]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[558]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[559]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[560]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[561]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[562]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[563]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[564]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[565]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[566]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[567]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[568]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[569]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[570]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[571]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[572]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[573]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[574]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[575]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[576]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[577]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[578]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[579]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[580]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[581]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[582]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[583]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[584]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[585]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[586]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[587]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[588]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[589]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[590]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[591]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[592]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[593]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[594]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[595]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[596]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[597]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[598]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[599]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[600]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[601]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[602]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[603]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[604]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[605]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[606]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[607]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[608]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[609]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[610]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[611]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[612]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[613]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[614]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[615]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[616]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[617]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[618]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[619]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[620]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[621]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[622]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[623]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[624]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[625]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[626]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[627]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[628]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[629]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[630]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[631]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[632]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[633]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[634]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[635]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[636]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[637]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[638]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[639]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[640]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[641]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[642]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[643]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[644]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[645]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[646]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[647]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[648]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[649]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[650]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[651]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[652]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[653]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[654]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[655]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[656]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[657]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[658]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[659]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[660]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[661]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[662]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[663]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[664]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[665]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[666]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[667]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[668]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[669]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[670]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[671]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[672]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[673]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[674]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[675]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[676]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[677]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[678]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[679]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[680]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[681]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[682]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[683]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[684]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[685]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[686]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[687]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[688]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[689]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[690]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[691]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[692]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[693]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[694]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[695]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[696]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[697]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[698]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[699]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[700]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[701]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[702]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[703]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[704]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[705]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[706]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[707]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[708]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[709]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[710]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[711]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[712]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[713]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[714]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[715]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[716]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[717]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[718]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[719]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[720]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[721]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[722]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[723]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[724]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[725]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[726]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[727]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[728]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[729]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[730]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[731]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[732]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[733]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[734]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[735]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[736]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[737]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[738]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[739]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", + "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", + "[t755of6lmsc7np3j6spka2x5yvicie732qv4wx6uu67rphf6elu] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739]", + "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] fx_kwargs[user_visible_outputs]: {}", + "[5gxbt6glr3gti63xp7cch6ofdqfxvag7hsiwxbut4if4xrx6d4g] inputs_to_check[0]: 151", + "[iaarrh4a5kr5bv73kkmrhkca4ysumu2vh65kdzworlx74re2dpo] inputs_to_check[1]: 152", + "[qmhmrekvpai4tc7rejm43nkaq3mgt2cy6w6mzkdg2pdzt4xbwx2] inputs_to_check[2]: 153", + "[qrczcfquzsvwccgmqugssyaib555w6hfdt4shqzvb6brfm3i5el] inputs_to_check[3]: 154", + "[6osxch7or66drdp4fy276u75unoezfbp32qtojzfami7nbek767] inputs_to_check[4]: 155", + "[o3fk4nlt4btpkqljmyxroatirrdhqjr4d264i2tywc6raomfjsw] inputs_to_check[5]: 156", + "[33b7c6n5un2rbt5kdgtbsh6c64len6cjzmios66nja6gia4ojcx] inputs_to_check[6]: 157", + "[ccishwh2vlgdi4q6qdu3en4gukgptvbrqyx5rofx72wz3bicnzy] inputs_to_check[7]: 158", + "[6pn6oydkkil5wvbpu5uvdffsyymbzhxx3t2skamg4wp5vtb3n5k] inputs_to_check[8]: 159", + "[alex6ca6gpzizomfu3wq3xj36jnymygy7hiroowxhaypic6tskv] inputs_to_check[9]: 160", + "[bivqezf4ymabhorni5gd4fe3urab3kvepwenq5gmvosf2pavdd5] inputs_to_check[10]: 161", + "[asq5khhnfffkvmnnmgeoqsye4l64y7kkvyk3snk2tyrgf2qb5mi] inputs_to_check[11]: 162", + "[wz2lj7dbnoawsawyjhobr6oa52jgjntn3o3lmrtkb4bmzfapwhc] inputs_to_check[12]: 163", + "[ngbs2fex7zmmncfaogkkrqimpfgq4wjlkqmf3tcyeswwe3hg3od] inputs_to_check[13]: 164", + "[fssplvrotxdu6guo4defun7h45ns624vjjonloaz7etd7ggxjg4] inputs_to_check[14]: 165", + "[sc6cvy6nxzmjuelrp2whvlhran2f4f6elzbuslbei3dfnaq6qkv] inputs_to_check[15]: 166", + "[p44ecp3xwjohf6mop4nzmt5wxi7uzcii63xm5kryejmipxkfjev] inputs_to_check[16]: 167", + "[xngi5shtw7jcqe2utazf35f7dj5ypzfuduqyypjo6jvv32knjf3] inputs_to_check[17]: 168", + "[niz4gr5x3ya3ukuhf4c7rnhhrswyan3kbzclc6g52u3kvurqmf4] inputs_to_check[18]: 169", + "[bfsiwqbrjfxsnzozsohdqmqjqwa5itj3abqdtdwxf7vxxw2t7iq] inputs_to_check[19]: 170", + "[ditguzdbhtwz6l4chqi3z6cziyi2pct43zhzf5zaragfhqf5akd] inputs_to_check[20]: 171", + "[e3h4w2vfojacabco57r6aj43dgqrhfx545yrdu7qzxkvjyf52t5] inputs_to_check[21]: 172", + "[mfhhdultow3ovihpso6dtsdpntivwjcpfismlaqwsjc6iainhtu] inputs_to_check[22]: 173", + "[kpyorpxsozia3z5wqrbnqnnff6bma7xllpbedgwp4gp76wwwegc] inputs_to_check[23]: 174", + "[lvgyjizlzse4yknfjmvl2uqg4ffygwpuddjgunbxcbjm5g5d5fw] inputs_to_check[24]: 175", + "[qgbusewwprhncdk5ahq4fytqvx7fhrecfetchddqudoq7lkyx3b] inputs_to_check[25]: 176", + "[hmzhjnkpczbvqnshjcfp45u74kngsbufwspp5fa5e4zidpcwwcs] inputs_to_check[26]: 177", + "[oqg53nqin266dynz7o2z27qo3hcxucd5yc3or2656uoqs6quan2] inputs_to_check[27]: 178", + "[k6swhf5ty2zmij2rxhvvpe7jwdxx6llqawhiqixvpvjgc6dhkl6] inputs_to_check[28]: 179", + "[a5s6scmr5zbjnm5a6isxptkuttpfh2uv2g7tza7ci6z366qvhph] inputs_to_check[29]: 180", + "[ma4hmi2zhz2v5pv6nw7owyw3z3lswrc5nveayyb3j5iastoc7zl] inputs_to_check[30]: 181", + "[4nwllnnjxh7swihocoe7pvsijtlpzpz6yjwckaahmgpaeimozt7] inputs_to_check[31]: 182", + "[wzkita6x3wixk2x6zsfnpzfqrfaezroxu5xnw7rq7ox7vqgu476] inputs_to_check[32]: 183", + "[xrjqiropfaxwzdwzzhyx6e6c7hjnkppzronjqgpzo6hbpi6yr4g] inputs_to_check[33]: 184", + "[urai455awfpx3bjmqadnlbnbklixi676brusjoruximpnjo76n2] inputs_to_check[34]: 185", + "[eermw7oa3whtf4qdg6cogh7s3xez3tfiest32uplhnwmagpouh6] inputs_to_check[35]: 186", + "[64xsnjmjbzfylxidk5yl6hsvuzswofeqbq3zuv7a6bkdopfeapn] inputs_to_check[36]: 187", + "[6kuevh47g3elelxoo5ac7cmo3r2fh2ygbhs6qyljspkz4y6r7r7] inputs_to_check[37]: 188", + "[ihnc7ngbkirbzwxoyjfhpwki2ewnnpkuzxlegp6fmw6fykdxxj3] inputs_to_check[38]: 189", + "[g3ay2xbjws2ov73c4lkobfibuq4wxwxe75uogzdg7crgtzlagn5] inputs_to_check[39]: 190", + "[6lydlqaer6b3qvlthv7uluevii5gvxgissp4oodsoye43zyvm7f] inputs_to_check[40]: 191", + "[mrm72xpjwecc4eczy6w3ndrca6qgx4ssucludsfllsadesjz7pl] inputs_to_check[41]: 192", + "[7cz46kewyqtcfh7adjmk5i2ljoq5v44ofijq4gmlca7gjy55c6r] inputs_to_check[42]: 193", + "[2ubcxo5fpwyipcg26qkwk7dfk4ci2edpvwxh4fhvgvstq4cf3ke] inputs_to_check[43]: 194", + "[idxzwtb6yotm5u6qhotbqxbuytqogl4lyuzcp4f4rpiekingxrj] inputs_to_check[44]: 195", + "[sxompataxg2kpp6lvmimnzadenerjisuicfkfuwm5exoinhfbsg] inputs_to_check[45]: 196", + "[5pmk7sv27s2bi54s3kwyduqs4ly256qfb2hfrlqfamtcsbur3iv] inputs_to_check[46]: 197", + "[pujj5ix4dbdajeweoew7fe743v6v6wscq7k7pjsqiqopewlh6s4] inputs_to_check[47]: 198", + "[tfk4gvmeljn6oc7yzg7ablm5slfgj5iwldvib27lgy3acro6g77] inputs_to_check[48]: 199", + "[3yqkxangefsazunaw2ibltnnexjixpvosdxyq7kipwrmhng4d66] inputs_to_check[49]: 200", + "[dek4vtwl3t4tioy2oedefor7hqzq7doc3fj4wwdmgrfpt773mvr] inputs_to_check[50]: 201", + "[eyzompn7rqbpbwprodhvszb4fjs3fubclamjylwqsna5imftnou] inputs_to_check[51]: 202", + "[cna2jzzfijl2grhnqpag2peenci7zfourhgcdzidromdrqdyvwm] inputs_to_check[52]: 203", + "[m5mnhtreky3cpmvgnfmbkri3pmhs22tu3kahhkdxv2q67t7rtxk] inputs_to_check[53]: 204", + "[pf3yxn6pwjw3apolzviv77ube4xeqq2n2lgwcduyjvzgiyxg45s] inputs_to_check[54]: 205", + "[zlqnl55vmxcplhlix7khtasmq2gdecqd7jpore57pll2by4u67y] inputs_to_check[55]: 206", + "[6u3htmimfebyyyavsbzctid7bqe3p2vzitaht7rhqdc6l653asq] inputs_to_check[56]: 207", + "[vnxrp3cswdykkkdcda2rykgrj6p7mbsgoq2euf3nhebgbrbdnah] inputs_to_check[57]: 208", + "[xkuyzvn72atoye7xvdr7nkkl6r43muegtld7i23uic3gez4op3w] inputs_to_check[58]: 209", + "[mc4hinl3b4abbhnnd6kjw3mpbdnhcszhce746aznurtp7rckvqe] inputs_to_check[59]: 210", + "[ofm6prxr6hqz2u5z3oywwp5635di572xearfxgeqikq7ir5zyer] inputs_to_check[60]: 211", + "[ib352syzxfoxetnwcwmr562kq6zxh3ba6k6ozr7vmgirzngz445] inputs_to_check[61]: 212", + "[z7vzn4qm5gv2ec4zm5oa552msg56z4an6jyi43vpqrh6rcwtww5] inputs_to_check[62]: 213", + "[6ijaweudkgtayjajytjpkgptbxqygprffq2iv65twukqg2ks24j] inputs_to_check[63]: 214", + "[zr4ja2xbcw5fkklyjkk4dqkrixbqthrmy4gx6wolog3g6twxagb] inputs_to_check[64]: 215", + "[f7rs6g77lmqs6rceoayty7ukws77rxwoi2litnshxyvn2l4qcrs] inputs_to_check[65]: 216", + "[iq4ks6jqbobe4ub7eedwgwhm6u7dujn4pdk6xtzlsutw35gihbh] inputs_to_check[66]: 217", + "[c4tbwksvxlxtiymirqu6houav6ecq5pr3zf4phc5ksuu6ccao5t] inputs_to_check[67]: 218", + "[i3gn36xaneuwkxpjtyp3iiaeudseihrk47h5len33wjeyzt6ez5] inputs_to_check[68]: 219", + "[2hfliifbgstom3wq5au6yesetrbhjmazl3j2z7o4wvztaigyd7p] inputs_to_check[69]: 220", + "[jsavgbo66sdqebklxk3p76jjgnvror75kkpwbfrr2grkuudknxt] inputs_to_check[70]: 221", + "[nl6eg6mm4g66lxwzopu5webjhh4pq52imycgdnbzw5sdafpfzuh] inputs_to_check[71]: 222", + "[uulsqibga6yzqtpej2uf4km34ygpbdt4gya2hth26yioguk4rlq] inputs_to_check[72]: 223", + "[ptaz43zzcc5wqnllxp3fe7pvbo75xmcjlabttblccy46hygeozy] inputs_to_check[73]: 224", + "[wlmb2nn3nq5s77bhgmozz6k5xgxru6empq7wgpphhcgprnpnc7o] inputs_to_check[74]: 225", + "[ltksjik3ctg6uqqzzgdjyvza6wpx757hanbg2zoy3qa4k3a4vpx] inputs_to_check[75]: 226", + "[7u7ifib46vahqccsmg654f3uzmjbluuqepiqql3s5ozwtzqz7pp] inputs_to_check[76]: 227", + "[osag4eozwslt5b7yuzth3lgba32eotdkqtr2kw4ey4i43bgvvrs] inputs_to_check[77]: 228", + "[zwdkykxjgt356ykebzld3rzgsfc5zlfk5st4we3ykzkaba3oqex] inputs_to_check[78]: 229", + "[iungcnzcibs3necrx3njdt2ckikflhexkoicbep7tvcvtj5ly5d] inputs_to_check[79]: 230", + "[bwmghobfcwh2lrdjqskkhe6u3vox2fbz53b65rgrzlmndirmzbm] inputs_to_check[80]: 231", + "[qhj52t7zdp3oargrkm4bg6aao35lsfeuhsm5pgevylkqroeb4lc] inputs_to_check[81]: 232", + "[bz4ayxadi54u4x7rrse46x6v7bfpgk5qy4scu6kg76fjuxlpy6u] inputs_to_check[82]: 233", + "[gw4sqgphdvlxdqktwxjso65pxpymqslnhgr6l4eyswl5izdixw6] inputs_to_check[83]: 234", + "[ewdr2tnhafkagyyp46wn5led3h754p4nzttu3w54uhxrqux3vvn] inputs_to_check[84]: 235", + "[4e7a3dp2ygblswjy2t4s5ytwvfgeppo6v5xajwfebrzqvxl5c3z] inputs_to_check[85]: 236", + "[dnrgqiivi7fu47qt3k5ea43mmzonrx62hvlvj7p4zs2whj47y7d] inputs_to_check[86]: 237", + "[pyoye6lh4ebpuzincukqsblo7sz2ok35q5n23bykkflupvr6b3x] inputs_to_check[87]: 238", + "[zzav4ck6zf5ii2aadgk45satnvsrcelh7lwasful7siezs7emg2] inputs_to_check[88]: 239", + "[jqli3sayeay6jxebdo6gg7uiifocbslkvp3hv7kgpgetcctr6kk] inputs_to_check[89]: 240", + "[xfscvbmf5xe24dzns4sojux47uhkj3rx5escbpdnh74elnkufk4] inputs_to_check[90]: 241", + "[h22xhecdg6loiwavuwtlmkjqgxj23bujpayu7r7cnuwow22zy2e] inputs_to_check[91]: 242", + "[ffeweldbvparqtiiuqrj2k57nqdefczqg2x5n36j3cgfubpzl2r] inputs_to_check[92]: 243", + "[ccpnfy7cohyrspuggdpbc7wpz4ul2fjni56cul6rgpztmmgrmbz] inputs_to_check[93]: 244", + "[wdxh2vlxp5oip6lbdhdc5bur247q4qljosyhafat4nqrygspikw] inputs_to_check[94]: 245", + "[imw7rafsd3kl6dhscu76cm37kz7cnlrn4cz4y77hnvu6epvrakh] inputs_to_check[95]: 246", + "[s7byzhoacpzsci2aqrvogeb7f5f7egbzqw5uujgvswtjutgh33t] inputs_to_check[96]: 247", + "[usr2ovytp5xi5pvkziuuu7bpg7zfa44xckrw7mx42ad2sixrfjw] inputs_to_check[97]: 248", + "[spsub4jhn3s5o5rdysoeyv5mqczcmmn3t3hmuflioyvd4im4pfe] inputs_to_check[98]: 249", + "[7c6i6h6bfell5u33q6rcv25lpgmk4jah3uhjjx6bjevvjnshoim] inputs_to_check[99]: 250", + "[ah4sag2igwizdxkml6voaf342455hrlpr6cesdepe5njv6zahlq] inputs_to_check[100]: 251", + "[5svbus4u4wap6a3z767wrjlymc7g7qft4ugaae65e5t7tvvigpv] inputs_to_check[101]: 252", + "[6dwei2ltmufaindqw57by4jqhptur5xijexpjttzbqiw5xq7ufb] inputs_to_check[102]: 253", + "[r557yxhzgnvuaqxq2y2aisgxqacm3cndl3efydbr3l5u7t6vaao] inputs_to_check[103]: 254", + "[ouw6rzfmq6mznqvkp4ouhr3fzo3ljmtrqmyrm3pitapfmmfcq67] inputs_to_check[104]: 255", + "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inputs_to_check[105]: 256", + "[bmucc7bpdzrvbf5petlmybacuupvmc7agiupfu54h73xhqupvaf] inputs_to_check[106]: 257", + "[26uxlcwvg3bcy54iimat7oht3bxo7jvlwbqdppzk5zwkoxyv7cb] inputs_to_check[107]: 258", + "[6ficbgfpvxoz32hthootgscev5fx7mrus4lxsrrnugdtmomziuz] inputs_to_check[108]: 259", + "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[109]: 260", + "[677wque2vebz2rd32qmi2owsgvljzdjthgg7zhc67nb7s6z46xw] inputs_to_check[110]: 261", + "[m7xjumsvlwcemhzme2dqw2usksuuan5qqnmqjaetwxiar4s4j6c] inputs_to_check[111]: 262", + "[paeekzgtlydskvlpjv3e3j2zbrofnrddnfj3b6ronaopjyxkseo] inputs_to_check[112]: 263", + "[rwjfz3vqhvl3rixynv6iipvly4la2yaqouw2ax5qrgr43ekinaz] inputs_to_check[113]: 264", + "[3rcgr2jikn5dxbda5qrx57cu7lqsoqexxqcfzwsazgqojrjzsua] inputs_to_check[114]: 265", + "[uqxikezud5mf2aqkp7seqvyze7hpn2o5c2yez6uydmlvjn2eh5v] inputs_to_check[115]: 266", + "[wjcwolwbnxwp5mof62qvgpjq3ar5rzt6kkdytt6wev4udsfqe6x] inputs_to_check[116]: 267", + "[veogf6bsf6ikphzxxhjgpzn5jqx3g55t4aoyurw7pm76yiy7zt2] inputs_to_check[117]: 268", + "[vjdrwtc6q6dw6qossmadxe3ct3fx342xxf5ebrhb42tbtrjmtoj] inputs_to_check[118]: 269", + "[fzi2ibchn6t3srl4xj73y4mywe6m4to7ns3ffl2tsei46mebm5l] inputs_to_check[119]: 270", + "[54mteumeehrhr42ajb2mmomf5sssoeewc5jl6nqlnib5ohxb3qd] inputs_to_check[120]: 271", + "[yk2ib3hhazfcmbecutfzw356dveofwjm5aokurcon6brky4fv62] inputs_to_check[121]: 272", + "[qmgadyz6iluhzeytokynhwx3fiybdqurux3poi3kx7xdkpp24io] inputs_to_check[122]: 273", + "[32k7zywxkomlofwth2mo5yip7d4f63vdnnwqucsjvfiviwhzqwj] inputs_to_check[123]: 274", + "[ex5ejsi3yiu26ymqgxsfzxfongdoyfsvtn6wbfmjmqan26mywvv] inputs_to_check[124]: 275", + "[3r3pihjgebqvjx6yoh4q36k6bmloajxb2wp7mpovqfmbgvtihjx] inputs_to_check[125]: 276", + "[jcfhgvbhktjahasdkcg4j22c4iu5wgbqo2by6mvnhkizuyl7adt] inputs_to_check[126]: 277", + "[qysgpar3mwuhkwfmkdvwppvqn2rc3wkswoy4l5242blg3s6nomq] inputs_to_check[127]: 278", + "[sj5qpjzwfnrnrslghhva6z7fypbpicxjbczgdsscdg7ty6dloke] inputs_to_check[128]: 279", + "[dh6oykkvzpw4hh2l2kyq3n3oiaawqasgyps2bki6ouaqwr5o4c5] inputs_to_check[129]: 280", + "[bqqqhzw6zzkrdgeg4wed3ge2u7wrxxweyb7ikuugm2lg5bw2low] inputs_to_check[130]: 281", + "[lpgee42ktycd2ec7bvvfmts5czoojvy5rglm2fz4boqbzvem3mz] inputs_to_check[131]: 282", + "[xxcd2riuuqmc632el2www5z43brah2hzj66qz5c2bl4txi6tphi] inputs_to_check[132]: 283", + "[lzq356tk2daemd3eejrqwmxfuprmzobz2v54vhsfmppeq35midf] inputs_to_check[133]: 284", + "[jeqk32o5ugk777bosvm26wli4suonie2j7xeyvcnflm6sh2su4f] inputs_to_check[134]: 285", + "[3nrlzlfgqdttgmpwe6ae4donvgjkzv5xalpsx6dkyop7d5e7owg] inputs_to_check[135]: 286", + "[2cfsjfkfvrnfsi2dtyhpmzmogddssh6uxfsq3ydka2snuhaqy4s] inputs_to_check[136]: 287", + "[d27xrkheycncdo3uzfumqtruedyl7pv2ur7to3lkeg7cjfaawja] inputs_to_check[137]: 288", + "[o65ulls4ibkqdqeuckzqgselabavcbzln6kizmseggtkrra6k4k] inputs_to_check[138]: 289", + "[e5avxq5la5yhcl3jslzu2qsr4tcolx35t2ujwwjr7lsqbhx2gk3] inputs_to_check[139]: 290", + "[cbjwd3zv52u3h7bomxvmf6ynkx4wmtm6bqzzwkzlmyr2ict4kfn] inputs_to_check[140]: 291", + "[fpvvfys36hfg7uwq5l6ekyjnvf3tjpbf4d5cxo4webm7epzhpvt] inputs_to_check[141]: 292", + "[bhuauv3brrxmr45r7yueymn76n3bwlyfrcrqtsbuok4ipqa5d2q] inputs_to_check[142]: 293", + "[n5u6kpqzxtau4hisgec3wulumses6yh323wd6fnttpwm42i3j7x] inputs_to_check[143]: 294", + "[rtapjyb4o2hwk4hyf4ep7oeikdbv4zq2ni4dilcfjnjo4sgwzvz] inputs_to_check[144]: 295", + "[6aadk5hp6aqszjgpca65txkgn7cp4wttn7o6q4uv5br7qu4ubxq] inputs_to_check[145]: 296", + "[ptxf4kphvduiofe7xbem4isrkenfdki3oegb53qdm2jzkbn26ed] inputs_to_check[146]: 297", + "[5pxadwvblqbojkxsf7lbkowi52nvhflb3rx456ro3uostqlb7ky] inputs_to_check[147]: 298", + "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", + "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", + "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", + "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", + "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", + "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", + "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", + "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", + "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", + "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", + "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", + "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", + "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", + "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", + "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", + "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", + "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", + "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", + "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", + "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", + "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", + "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", + "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", + "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", + "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", + "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", + "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", + "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", + "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", + "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", + "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", + "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", + "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", + "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", + "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", + "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", + "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", + "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", + "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", + "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", + "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", + "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", + "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", + "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", + "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", + "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", + "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", + "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", + "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", + "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", + "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", + "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", + "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", + "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", + "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", + "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", + "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess" + ] + }, + "ph": "i", + "pid": 0, + "s": "p" + } +V0806 13:56:22.387000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "f04a88277d7f7865bde907ed3d2b2c2e"} + {"key": "f2hzi4mmzauwdbyib6zmykorraxjbqftyvglo6f4mz2b36wljiti", "cache_state": "miss", "components": ["[i5hietdxt6dlkcrwbpsvei6udef6z3eec54zo7cpjzbybmgvi7b] gm: ()\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1):\n _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)\n getitem = _foreach_add[0]\n getitem_1 = _foreach_add[1]\n getitem_2 = _foreach_add[2]\n getitem_3 = _foreach_add[3]\n getitem_4 = _foreach_add[4]\n getitem_5 = _foreach_add[5]\n getitem_6 = _foreach_add[6]\n getitem_7 = _foreach_add[7]\n getitem_8 = _foreach_add[8]\n getitem_9 = _foreach_add[9]\n getitem_10 = _foreach_add[10]\n getitem_11 = _foreach_add[11]\n getitem_12 = _foreach_add[12]\n getitem_13 = _foreach_add[13]\n getitem_14 = _foreach_add[14]\n getitem_15 = _foreach_add[15]\n getitem_16 = _foreach_add[16]\n getitem_17 = _foreach_add[17]\n getitem_18 = _foreach_add[18]\n getitem_19 = _foreach_add[19]\n getitem_20 = _foreach_add[20]\n getitem_21 = _foreach_add[21]\n getitem_22 = _foreach_add[22]\n getitem_23 = _foreach_add[23]\n getitem_24 = _foreach_add[24]\n getitem_25 = _foreach_add[25]\n getitem_26 = _foreach_add[26]\n getitem_27 = _foreach_add[27]\n getitem_28 = _foreach_add[28]\n getitem_29 = _foreach_add[29]\n getitem_30 = _foreach_add[30]\n getitem_31 = _foreach_add[31]\n getitem_32 = _foreach_add[32]\n getitem_33 = _foreach_add[33]\n getitem_34 = _foreach_add[34]\n getitem_35 = _foreach_add[35]\n getitem_36 = _foreach_add[36]\n getitem_37 = _foreach_add[37]\n getitem_38 = _foreach_add[38]\n getitem_39 = _foreach_add[39]\n getitem_40 = _foreach_add[40]\n getitem_41 = _foreach_add[41]\n getitem_42 = _foreach_add[42]\n getitem_43 = _foreach_add[43]\n getitem_44 = _foreach_add[44]\n getitem_45 = _foreach_add[45]\n getitem_46 = _foreach_add[46]\n getitem_47 = _foreach_add[47]\n getitem_48 = _foreach_add[48]\n getitem_49 = _foreach_add[49]\n getitem_50 = _foreach_add[50]\n getitem_51 = _foreach_add[51]\n getitem_52 = _foreach_add[52]\n getitem_53 = _foreach_add[53]\n getitem_54 = _foreach_add[54]\n getitem_55 = _foreach_add[55]\n getitem_56 = _foreach_add[56]\n getitem_57 = _foreach_add[57]\n getitem_58 = _foreach_add[58]\n getitem_59 = _foreach_add[59]\n getitem_60 = _foreach_add[60]\n getitem_61 = _foreach_add[61]\n getitem_62 = _foreach_add[62]\n getitem_63 = _foreach_add[63]\n getitem_64 = _foreach_add[64]\n getitem_65 = _foreach_add[65]\n getitem_66 = _foreach_add[66]\n getitem_67 = _foreach_add[67]\n getitem_68 = _foreach_add[68]\n getitem_69 = _foreach_add[69]\n getitem_70 = _foreach_add[70]\n getitem_71 = _foreach_add[71]\n getitem_72 = _foreach_add[72]\n getitem_73 = _foreach_add[73]\n getitem_74 = _foreach_add[74]\n getitem_75 = _foreach_add[75]\n getitem_76 = _foreach_add[76]\n getitem_77 = _foreach_add[77]\n getitem_78 = _foreach_add[78]\n getitem_79 = _foreach_add[79]\n getitem_80 = _foreach_add[80]\n getitem_81 = _foreach_add[81]\n getitem_82 = _foreach_add[82]\n getitem_83 = _foreach_add[83]\n getitem_84 = _foreach_add[84]\n getitem_85 = _foreach_add[85]\n getitem_86 = _foreach_add[86]\n getitem_87 = _foreach_add[87]\n getitem_88 = _foreach_add[88]\n getitem_89 = _foreach_add[89]\n getitem_90 = _foreach_add[90]\n getitem_91 = _foreach_add[91]\n getitem_92 = _foreach_add[92]\n getitem_93 = _foreach_add[93]\n getitem_94 = _foreach_add[94]\n getitem_95 = _foreach_add[95]\n getitem_96 = _foreach_add[96]\n getitem_97 = _foreach_add[97]\n getitem_98 = _foreach_add[98]\n getitem_99 = _foreach_add[99]\n getitem_100 = _foreach_add[100]\n getitem_101 = _foreach_add[101]\n getitem_102 = _foreach_add[102]\n getitem_103 = _foreach_add[103]\n getitem_104 = _foreach_add[104]\n getitem_105 = _foreach_add[105]\n getitem_106 = _foreach_add[106]\n getitem_107 = _foreach_add[107]\n getitem_108 = _foreach_add[108]\n getitem_109 = _foreach_add[109]\n getitem_110 = _foreach_add[110]\n getitem_111 = _foreach_add[111]\n getitem_112 = _foreach_add[112]\n getitem_113 = _foreach_add[113]\n getitem_114 = _foreach_add[114]\n getitem_115 = _foreach_add[115]\n getitem_116 = _foreach_add[116]\n getitem_117 = _foreach_add[117]\n getitem_118 = _foreach_add[118]\n getitem_119 = _foreach_add[119]\n getitem_120 = _foreach_add[120]\n getitem_121 = _foreach_add[121]\n getitem_122 = _foreach_add[122]\n getitem_123 = _foreach_add[123]\n getitem_124 = _foreach_add[124]\n getitem_125 = _foreach_add[125]\n getitem_126 = _foreach_add[126]\n getitem_127 = _foreach_add[127]\n getitem_128 = _foreach_add[128]\n getitem_129 = _foreach_add[129]\n getitem_130 = _foreach_add[130]\n getitem_131 = _foreach_add[131]\n getitem_132 = _foreach_add[132]\n getitem_133 = _foreach_add[133]\n getitem_134 = _foreach_add[134]\n getitem_135 = _foreach_add[135]\n getitem_136 = _foreach_add[136]\n getitem_137 = _foreach_add[137]\n getitem_138 = _foreach_add[138]\n getitem_139 = _foreach_add[139]\n getitem_140 = _foreach_add[140]\n getitem_141 = _foreach_add[141]\n getitem_142 = _foreach_add[142]\n getitem_143 = _foreach_add[143]\n getitem_144 = _foreach_add[144]\n getitem_145 = _foreach_add[145]\n getitem_146 = _foreach_add[146]\n getitem_147 = _foreach_add[147]; _foreach_add = None\n _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])\n getitem_148 = _foreach_sub[0]\n getitem_149 = _foreach_sub[1]\n getitem_150 = _foreach_sub[2]\n getitem_151 = _foreach_sub[3]\n getitem_152 = _foreach_sub[4]\n getitem_153 = _foreach_sub[5]\n getitem_154 = _foreach_sub[6]\n getitem_155 = _foreach_sub[7]\n getitem_156 = _foreach_sub[8]\n getitem_157 = _foreach_sub[9]\n getitem_158 = _foreach_sub[10]\n getitem_159 = _foreach_sub[11]\n getitem_160 = _foreach_sub[12]\n getitem_161 = _foreach_sub[13]\n getitem_162 = _foreach_sub[14]\n getitem_163 = _foreach_sub[15]\n getitem_164 = _foreach_sub[16]\n getitem_165 = _foreach_sub[17]\n getitem_166 = _foreach_sub[18]\n getitem_167 = _foreach_sub[19]\n getitem_168 = _foreach_sub[20]\n getitem_169 = _foreach_sub[21]\n getitem_170 = _foreach_sub[22]\n getitem_171 = _foreach_sub[23]\n getitem_172 = _foreach_sub[24]\n getitem_173 = _foreach_sub[25]\n getitem_174 = _foreach_sub[26]\n getitem_175 = _foreach_sub[27]\n getitem_176 = _foreach_sub[28]\n getitem_177 = _foreach_sub[29]\n getitem_178 = _foreach_sub[30]\n getitem_179 = _foreach_sub[31]\n getitem_180 = _foreach_sub[32]\n getitem_181 = _foreach_sub[33]\n getitem_182 = _foreach_sub[34]\n getitem_183 = _foreach_sub[35]\n getitem_184 = _foreach_sub[36]\n getitem_185 = _foreach_sub[37]\n getitem_186 = _foreach_sub[38]\n getitem_187 = _foreach_sub[39]\n getitem_188 = _foreach_sub[40]\n getitem_189 = _foreach_sub[41]\n getitem_190 = _foreach_sub[42]\n getitem_191 = _foreach_sub[43]\n getitem_192 = _foreach_sub[44]\n getitem_193 = _foreach_sub[45]\n getitem_194 = _foreach_sub[46]\n getitem_195 = _foreach_sub[47]\n getitem_196 = _foreach_sub[48]\n getitem_197 = _foreach_sub[49]\n getitem_198 = _foreach_sub[50]\n getitem_199 = _foreach_sub[51]\n getitem_200 = _foreach_sub[52]\n getitem_201 = _foreach_sub[53]\n getitem_202 = _foreach_sub[54]\n getitem_203 = _foreach_sub[55]\n getitem_204 = _foreach_sub[56]\n getitem_205 = _foreach_sub[57]\n getitem_206 = _foreach_sub[58]\n getitem_207 = _foreach_sub[59]\n getitem_208 = _foreach_sub[60]\n getitem_209 = _foreach_sub[61]\n getitem_210 = _foreach_sub[62]\n getitem_211 = _foreach_sub[63]\n getitem_212 = _foreach_sub[64]\n getitem_213 = _foreach_sub[65]\n getitem_214 = _foreach_sub[66]\n getitem_215 = _foreach_sub[67]\n getitem_216 = _foreach_sub[68]\n getitem_217 = _foreach_sub[69]\n getitem_218 = _foreach_sub[70]\n getitem_219 = _foreach_sub[71]\n getitem_220 = _foreach_sub[72]\n getitem_221 = _foreach_sub[73]\n getitem_222 = _foreach_sub[74]\n getitem_223 = _foreach_sub[75]\n getitem_224 = _foreach_sub[76]\n getitem_225 = _foreach_sub[77]\n getitem_226 = _foreach_sub[78]\n getitem_227 = _foreach_sub[79]\n getitem_228 = _foreach_sub[80]\n getitem_229 = _foreach_sub[81]\n getitem_230 = _foreach_sub[82]\n getitem_231 = _foreach_sub[83]\n getitem_232 = _foreach_sub[84]\n getitem_233 = _foreach_sub[85]\n getitem_234 = _foreach_sub[86]\n getitem_235 = _foreach_sub[87]\n getitem_236 = _foreach_sub[88]\n getitem_237 = _foreach_sub[89]\n getitem_238 = _foreach_sub[90]\n getitem_239 = _foreach_sub[91]\n getitem_240 = _foreach_sub[92]\n getitem_241 = _foreach_sub[93]\n getitem_242 = _foreach_sub[94]\n getitem_243 = _foreach_sub[95]\n getitem_244 = _foreach_sub[96]\n getitem_245 = _foreach_sub[97]\n getitem_246 = _foreach_sub[98]\n getitem_247 = _foreach_sub[99]\n getitem_248 = _foreach_sub[100]\n getitem_249 = _foreach_sub[101]\n getitem_250 = _foreach_sub[102]\n getitem_251 = _foreach_sub[103]\n getitem_252 = _foreach_sub[104]\n getitem_253 = _foreach_sub[105]\n getitem_254 = _foreach_sub[106]\n getitem_255 = _foreach_sub[107]\n getitem_256 = _foreach_sub[108]\n getitem_257 = _foreach_sub[109]\n getitem_258 = _foreach_sub[110]\n getitem_259 = _foreach_sub[111]\n getitem_260 = _foreach_sub[112]\n getitem_261 = _foreach_sub[113]\n getitem_262 = _foreach_sub[114]\n getitem_263 = _foreach_sub[115]\n getitem_264 = _foreach_sub[116]\n getitem_265 = _foreach_sub[117]\n getitem_266 = _foreach_sub[118]\n getitem_267 = _foreach_sub[119]\n getitem_268 = _foreach_sub[120]\n getitem_269 = _foreach_sub[121]\n getitem_270 = _foreach_sub[122]\n getitem_271 = _foreach_sub[123]\n getitem_272 = _foreach_sub[124]\n getitem_273 = _foreach_sub[125]\n getitem_274 = _foreach_sub[126]\n getitem_275 = _foreach_sub[127]\n getitem_276 = _foreach_sub[128]\n getitem_277 = _foreach_sub[129]\n getitem_278 = _foreach_sub[130]\n getitem_279 = _foreach_sub[131]\n getitem_280 = _foreach_sub[132]\n getitem_281 = _foreach_sub[133]\n getitem_282 = _foreach_sub[134]\n getitem_283 = _foreach_sub[135]\n getitem_284 = _foreach_sub[136]\n getitem_285 = _foreach_sub[137]\n getitem_286 = _foreach_sub[138]\n getitem_287 = _foreach_sub[139]\n getitem_288 = _foreach_sub[140]\n getitem_289 = _foreach_sub[141]\n getitem_290 = _foreach_sub[142]\n getitem_291 = _foreach_sub[143]\n getitem_292 = _foreach_sub[144]\n getitem_293 = _foreach_sub[145]\n getitem_294 = _foreach_sub[146]\n getitem_295 = _foreach_sub[147]; _foreach_sub = None\n _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998); getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None\n getitem_296 = _foreach_mul[0]\n getitem_297 = _foreach_mul[1]\n getitem_298 = _foreach_mul[2]\n getitem_299 = _foreach_mul[3]\n getitem_300 = _foreach_mul[4]\n getitem_301 = _foreach_mul[5]\n getitem_302 = _foreach_mul[6]\n getitem_303 = _foreach_mul[7]\n getitem_304 = _foreach_mul[8]\n getitem_305 = _foreach_mul[9]\n getitem_306 = _foreach_mul[10]\n getitem_307 = _foreach_mul[11]\n getitem_308 = _foreach_mul[12]\n getitem_309 = _foreach_mul[13]\n getitem_310 = _foreach_mul[14]\n getitem_311 = _foreach_mul[15]\n getitem_312 = _foreach_mul[16]\n getitem_313 = _foreach_mul[17]\n getitem_314 = _foreach_mul[18]\n getitem_315 = _foreach_mul[19]\n getitem_316 = _foreach_mul[20]\n getitem_317 = _foreach_mul[21]\n getitem_318 = _foreach_mul[22]\n getitem_319 = _foreach_mul[23]\n getitem_320 = _foreach_mul[24]\n getitem_321 = _foreach_mul[25]\n getitem_322 = _foreach_mul[26]\n getitem_323 = _foreach_mul[27]\n getitem_324 = _foreach_mul[28]\n getitem_325 = _foreach_mul[29]\n getitem_326 = _foreach_mul[30]\n getitem_327 = _foreach_mul[31]\n getitem_328 = _foreach_mul[32]\n getitem_329 = _foreach_mul[33]\n getitem_330 = _foreach_mul[34]\n getitem_331 = _foreach_mul[35]\n getitem_332 = _foreach_mul[36]\n getitem_333 = _foreach_mul[37]\n getitem_334 = _foreach_mul[38]\n getitem_335 = _foreach_mul[39]\n getitem_336 = _foreach_mul[40]\n getitem_337 = _foreach_mul[41]\n getitem_338 = _foreach_mul[42]\n getitem_339 = _foreach_mul[43]\n getitem_340 = _foreach_mul[44]\n getitem_341 = _foreach_mul[45]\n getitem_342 = _foreach_mul[46]\n getitem_343 = _foreach_mul[47]\n getitem_344 = _foreach_mul[48]\n getitem_345 = _foreach_mul[49]\n getitem_346 = _foreach_mul[50]\n getitem_347 = _foreach_mul[51]\n getitem_348 = _foreach_mul[52]\n getitem_349 = _foreach_mul[53]\n getitem_350 = _foreach_mul[54]\n getitem_351 = _foreach_mul[55]\n getitem_352 = _foreach_mul[56]\n getitem_353 = _foreach_mul[57]\n getitem_354 = _foreach_mul[58]\n getitem_355 = _foreach_mul[59]\n getitem_356 = _foreach_mul[60]\n getitem_357 = _foreach_mul[61]\n getitem_358 = _foreach_mul[62]\n getitem_359 = _foreach_mul[63]\n getitem_360 = _foreach_mul[64]\n getitem_361 = _foreach_mul[65]\n getitem_362 = _foreach_mul[66]\n getitem_363 = _foreach_mul[67]\n getitem_364 = _foreach_mul[68]\n getitem_365 = _foreach_mul[69]\n getitem_366 = _foreach_mul[70]\n getitem_367 = _foreach_mul[71]\n getitem_368 = _foreach_mul[72]\n getitem_369 = _foreach_mul[73]\n getitem_370 = _foreach_mul[74]\n getitem_371 = _foreach_mul[75]\n getitem_372 = _foreach_mul[76]\n getitem_373 = _foreach_mul[77]\n getitem_374 = _foreach_mul[78]\n getitem_375 = _foreach_mul[79]\n getitem_376 = _foreach_mul[80]\n getitem_377 = _foreach_mul[81]\n getitem_378 = _foreach_mul[82]\n getitem_379 = _foreach_mul[83]\n getitem_380 = _foreach_mul[84]\n getitem_381 = _foreach_mul[85]\n getitem_382 = _foreach_mul[86]\n getitem_383 = _foreach_mul[87]\n getitem_384 = _foreach_mul[88]\n getitem_385 = _foreach_mul[89]\n getitem_386 = _foreach_mul[90]\n getitem_387 = _foreach_mul[91]\n getitem_388 = _foreach_mul[92]\n getitem_389 = _foreach_mul[93]\n getitem_390 = _foreach_mul[94]\n getitem_391 = _foreach_mul[95]\n getitem_392 = _foreach_mul[96]\n getitem_393 = _foreach_mul[97]\n getitem_394 = _foreach_mul[98]\n getitem_395 = _foreach_mul[99]\n getitem_396 = _foreach_mul[100]\n getitem_397 = _foreach_mul[101]\n getitem_398 = _foreach_mul[102]\n getitem_399 = _foreach_mul[103]\n getitem_400 = _foreach_mul[104]\n getitem_401 = _foreach_mul[105]\n getitem_402 = _foreach_mul[106]\n getitem_403 = _foreach_mul[107]\n getitem_404 = _foreach_mul[108]\n getitem_405 = _foreach_mul[109]\n getitem_406 = _foreach_mul[110]\n getitem_407 = _foreach_mul[111]\n getitem_408 = _foreach_mul[112]\n getitem_409 = _foreach_mul[113]\n getitem_410 = _foreach_mul[114]\n getitem_411 = _foreach_mul[115]\n getitem_412 = _foreach_mul[116]\n getitem_413 = _foreach_mul[117]\n getitem_414 = _foreach_mul[118]\n getitem_415 = _foreach_mul[119]\n getitem_416 = _foreach_mul[120]\n getitem_417 = _foreach_mul[121]\n getitem_418 = _foreach_mul[122]\n getitem_419 = _foreach_mul[123]\n getitem_420 = _foreach_mul[124]\n getitem_421 = _foreach_mul[125]\n getitem_422 = _foreach_mul[126]\n getitem_423 = _foreach_mul[127]\n getitem_424 = _foreach_mul[128]\n getitem_425 = _foreach_mul[129]\n getitem_426 = _foreach_mul[130]\n getitem_427 = _foreach_mul[131]\n getitem_428 = _foreach_mul[132]\n getitem_429 = _foreach_mul[133]\n getitem_430 = _foreach_mul[134]\n getitem_431 = _foreach_mul[135]\n getitem_432 = _foreach_mul[136]\n getitem_433 = _foreach_mul[137]\n getitem_434 = _foreach_mul[138]\n getitem_435 = _foreach_mul[139]\n getitem_436 = _foreach_mul[140]\n getitem_437 = _foreach_mul[141]\n getitem_438 = _foreach_mul[142]\n getitem_439 = _foreach_mul[143]\n getitem_440 = _foreach_mul[144]\n getitem_441 = _foreach_mul[145]\n getitem_442 = _foreach_mul[146]\n getitem_443 = _foreach_mul[147]; _foreach_mul = None\n _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]); getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None\n getitem_444 = _foreach_add_1[0]\n getitem_445 = _foreach_add_1[1]\n getitem_446 = _foreach_add_1[2]\n getitem_447 = _foreach_add_1[3]\n getitem_448 = _foreach_add_1[4]\n getitem_449 = _foreach_add_1[5]\n getitem_450 = _foreach_add_1[6]\n getitem_451 = _foreach_add_1[7]\n getitem_452 = _foreach_add_1[8]\n getitem_453 = _foreach_add_1[9]\n getitem_454 = _foreach_add_1[10]\n getitem_455 = _foreach_add_1[11]\n getitem_456 = _foreach_add_1[12]\n getitem_457 = _foreach_add_1[13]\n getitem_458 = _foreach_add_1[14]\n getitem_459 = _foreach_add_1[15]\n getitem_460 = _foreach_add_1[16]\n getitem_461 = _foreach_add_1[17]\n getitem_462 = _foreach_add_1[18]\n getitem_463 = _foreach_add_1[19]\n getitem_464 = _foreach_add_1[20]\n getitem_465 = _foreach_add_1[21]\n getitem_466 = _foreach_add_1[22]\n getitem_467 = _foreach_add_1[23]\n getitem_468 = _foreach_add_1[24]\n getitem_469 = _foreach_add_1[25]\n getitem_470 = _foreach_add_1[26]\n getitem_471 = _foreach_add_1[27]\n getitem_472 = _foreach_add_1[28]\n getitem_473 = _foreach_add_1[29]\n getitem_474 = _foreach_add_1[30]\n getitem_475 = _foreach_add_1[31]\n getitem_476 = _foreach_add_1[32]\n getitem_477 = _foreach_add_1[33]\n getitem_478 = _foreach_add_1[34]\n getitem_479 = _foreach_add_1[35]\n getitem_480 = _foreach_add_1[36]\n getitem_481 = _foreach_add_1[37]\n getitem_482 = _foreach_add_1[38]\n getitem_483 = _foreach_add_1[39]\n getitem_484 = _foreach_add_1[40]\n getitem_485 = _foreach_add_1[41]\n getitem_486 = _foreach_add_1[42]\n getitem_487 = _foreach_add_1[43]\n getitem_488 = _foreach_add_1[44]\n getitem_489 = _foreach_add_1[45]\n getitem_490 = _foreach_add_1[46]\n getitem_491 = _foreach_add_1[47]\n getitem_492 = _foreach_add_1[48]\n getitem_493 = _foreach_add_1[49]\n getitem_494 = _foreach_add_1[50]\n getitem_495 = _foreach_add_1[51]\n getitem_496 = _foreach_add_1[52]\n getitem_497 = _foreach_add_1[53]\n getitem_498 = _foreach_add_1[54]\n getitem_499 = _foreach_add_1[55]\n getitem_500 = _foreach_add_1[56]\n getitem_501 = _foreach_add_1[57]\n getitem_502 = _foreach_add_1[58]\n getitem_503 = _foreach_add_1[59]\n getitem_504 = _foreach_add_1[60]\n getitem_505 = _foreach_add_1[61]\n getitem_506 = _foreach_add_1[62]\n getitem_507 = _foreach_add_1[63]\n getitem_508 = _foreach_add_1[64]\n getitem_509 = _foreach_add_1[65]\n getitem_510 = _foreach_add_1[66]\n getitem_511 = _foreach_add_1[67]\n getitem_512 = _foreach_add_1[68]\n getitem_513 = _foreach_add_1[69]\n getitem_514 = _foreach_add_1[70]\n getitem_515 = _foreach_add_1[71]\n getitem_516 = _foreach_add_1[72]\n getitem_517 = _foreach_add_1[73]\n getitem_518 = _foreach_add_1[74]\n getitem_519 = _foreach_add_1[75]\n getitem_520 = _foreach_add_1[76]\n getitem_521 = _foreach_add_1[77]\n getitem_522 = _foreach_add_1[78]\n getitem_523 = _foreach_add_1[79]\n getitem_524 = _foreach_add_1[80]\n getitem_525 = _foreach_add_1[81]\n getitem_526 = _foreach_add_1[82]\n getitem_527 = _foreach_add_1[83]\n getitem_528 = _foreach_add_1[84]\n getitem_529 = _foreach_add_1[85]\n getitem_530 = _foreach_add_1[86]\n getitem_531 = _foreach_add_1[87]\n getitem_532 = _foreach_add_1[88]\n getitem_533 = _foreach_add_1[89]\n getitem_534 = _foreach_add_1[90]\n getitem_535 = _foreach_add_1[91]\n getitem_536 = _foreach_add_1[92]\n getitem_537 = _foreach_add_1[93]\n getitem_538 = _foreach_add_1[94]\n getitem_539 = _foreach_add_1[95]\n getitem_540 = _foreach_add_1[96]\n getitem_541 = _foreach_add_1[97]\n getitem_542 = _foreach_add_1[98]\n getitem_543 = _foreach_add_1[99]\n getitem_544 = _foreach_add_1[100]\n getitem_545 = _foreach_add_1[101]\n getitem_546 = _foreach_add_1[102]\n getitem_547 = _foreach_add_1[103]\n getitem_548 = _foreach_add_1[104]\n getitem_549 = _foreach_add_1[105]\n getitem_550 = _foreach_add_1[106]\n getitem_551 = _foreach_add_1[107]\n getitem_552 = _foreach_add_1[108]\n getitem_553 = _foreach_add_1[109]\n getitem_554 = _foreach_add_1[110]\n getitem_555 = _foreach_add_1[111]\n getitem_556 = _foreach_add_1[112]\n getitem_557 = _foreach_add_1[113]\n getitem_558 = _foreach_add_1[114]\n getitem_559 = _foreach_add_1[115]\n getitem_560 = _foreach_add_1[116]\n getitem_561 = _foreach_add_1[117]\n getitem_562 = _foreach_add_1[118]\n getitem_563 = _foreach_add_1[119]\n getitem_564 = _foreach_add_1[120]\n getitem_565 = _foreach_add_1[121]\n getitem_566 = _foreach_add_1[122]\n getitem_567 = _foreach_add_1[123]\n getitem_568 = _foreach_add_1[124]\n getitem_569 = _foreach_add_1[125]\n getitem_570 = _foreach_add_1[126]\n getitem_571 = _foreach_add_1[127]\n getitem_572 = _foreach_add_1[128]\n getitem_573 = _foreach_add_1[129]\n getitem_574 = _foreach_add_1[130]\n getitem_575 = _foreach_add_1[131]\n getitem_576 = _foreach_add_1[132]\n getitem_577 = _foreach_add_1[133]\n getitem_578 = _foreach_add_1[134]\n getitem_579 = _foreach_add_1[135]\n getitem_580 = _foreach_add_1[136]\n getitem_581 = _foreach_add_1[137]\n getitem_582 = _foreach_add_1[138]\n getitem_583 = _foreach_add_1[139]\n getitem_584 = _foreach_add_1[140]\n getitem_585 = _foreach_add_1[141]\n getitem_586 = _foreach_add_1[142]\n getitem_587 = _foreach_add_1[143]\n getitem_588 = _foreach_add_1[144]\n getitem_589 = _foreach_add_1[145]\n getitem_590 = _foreach_add_1[146]\n getitem_591 = _foreach_add_1[147]; _foreach_add_1 = None\n _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)\n getitem_592 = _foreach_mul_1[0]\n getitem_593 = _foreach_mul_1[1]\n getitem_594 = _foreach_mul_1[2]\n getitem_595 = _foreach_mul_1[3]\n getitem_596 = _foreach_mul_1[4]\n getitem_597 = _foreach_mul_1[5]\n getitem_598 = _foreach_mul_1[6]\n getitem_599 = _foreach_mul_1[7]\n getitem_600 = _foreach_mul_1[8]\n getitem_601 = _foreach_mul_1[9]\n getitem_602 = _foreach_mul_1[10]\n getitem_603 = _foreach_mul_1[11]\n getitem_604 = _foreach_mul_1[12]\n getitem_605 = _foreach_mul_1[13]\n getitem_606 = _foreach_mul_1[14]\n getitem_607 = _foreach_mul_1[15]\n getitem_608 = _foreach_mul_1[16]\n getitem_609 = _foreach_mul_1[17]\n getitem_610 = _foreach_mul_1[18]\n getitem_611 = _foreach_mul_1[19]\n getitem_612 = _foreach_mul_1[20]\n getitem_613 = _foreach_mul_1[21]\n getitem_614 = _foreach_mul_1[22]\n getitem_615 = _foreach_mul_1[23]\n getitem_616 = _foreach_mul_1[24]\n getitem_617 = _foreach_mul_1[25]\n getitem_618 = _foreach_mul_1[26]\n getitem_619 = _foreach_mul_1[27]\n getitem_620 = _foreach_mul_1[28]\n getitem_621 = _foreach_mul_1[29]\n getitem_622 = _foreach_mul_1[30]\n getitem_623 = _foreach_mul_1[31]\n getitem_624 = _foreach_mul_1[32]\n getitem_625 = _foreach_mul_1[33]\n getitem_626 = _foreach_mul_1[34]\n getitem_627 = _foreach_mul_1[35]\n getitem_628 = _foreach_mul_1[36]\n getitem_629 = _foreach_mul_1[37]\n getitem_630 = _foreach_mul_1[38]\n getitem_631 = _foreach_mul_1[39]\n getitem_632 = _foreach_mul_1[40]\n getitem_633 = _foreach_mul_1[41]\n getitem_634 = _foreach_mul_1[42]\n getitem_635 = _foreach_mul_1[43]\n getitem_636 = _foreach_mul_1[44]\n getitem_637 = _foreach_mul_1[45]\n getitem_638 = _foreach_mul_1[46]\n getitem_639 = _foreach_mul_1[47]\n getitem_640 = _foreach_mul_1[48]\n getitem_641 = _foreach_mul_1[49]\n getitem_642 = _foreach_mul_1[50]\n getitem_643 = _foreach_mul_1[51]\n getitem_644 = _foreach_mul_1[52]\n getitem_645 = _foreach_mul_1[53]\n getitem_646 = _foreach_mul_1[54]\n getitem_647 = _foreach_mul_1[55]\n getitem_648 = _foreach_mul_1[56]\n getitem_649 = _foreach_mul_1[57]\n getitem_650 = _foreach_mul_1[58]\n getitem_651 = _foreach_mul_1[59]\n getitem_652 = _foreach_mul_1[60]\n getitem_653 = _foreach_mul_1[61]\n getitem_654 = _foreach_mul_1[62]\n getitem_655 = _foreach_mul_1[63]\n getitem_656 = _foreach_mul_1[64]\n getitem_657 = _foreach_mul_1[65]\n getitem_658 = _foreach_mul_1[66]\n getitem_659 = _foreach_mul_1[67]\n getitem_660 = _foreach_mul_1[68]\n getitem_661 = _foreach_mul_1[69]\n getitem_662 = _foreach_mul_1[70]\n getitem_663 = _foreach_mul_1[71]\n getitem_664 = _foreach_mul_1[72]\n getitem_665 = _foreach_mul_1[73]\n getitem_666 = _foreach_mul_1[74]\n getitem_667 = _foreach_mul_1[75]\n getitem_668 = _foreach_mul_1[76]\n getitem_669 = _foreach_mul_1[77]\n getitem_670 = _foreach_mul_1[78]\n getitem_671 = _foreach_mul_1[79]\n getitem_672 = _foreach_mul_1[80]\n getitem_673 = _foreach_mul_1[81]\n getitem_674 = _foreach_mul_1[82]\n getitem_675 = _foreach_mul_1[83]\n getitem_676 = _foreach_mul_1[84]\n getitem_677 = _foreach_mul_1[85]\n getitem_678 = _foreach_mul_1[86]\n getitem_679 = _foreach_mul_1[87]\n getitem_680 = _foreach_mul_1[88]\n getitem_681 = _foreach_mul_1[89]\n getitem_682 = _foreach_mul_1[90]\n getitem_683 = _foreach_mul_1[91]\n getitem_684 = _foreach_mul_1[92]\n getitem_685 = _foreach_mul_1[93]\n getitem_686 = _foreach_mul_1[94]\n getitem_687 = _foreach_mul_1[95]\n getitem_688 = _foreach_mul_1[96]\n getitem_689 = _foreach_mul_1[97]\n getitem_690 = _foreach_mul_1[98]\n getitem_691 = _foreach_mul_1[99]\n getitem_692 = _foreach_mul_1[100]\n getitem_693 = _foreach_mul_1[101]\n getitem_694 = _foreach_mul_1[102]\n getitem_695 = _foreach_mul_1[103]\n getitem_696 = _foreach_mul_1[104]\n getitem_697 = _foreach_mul_1[105]\n getitem_698 = _foreach_mul_1[106]\n getitem_699 = _foreach_mul_1[107]\n getitem_700 = _foreach_mul_1[108]\n getitem_701 = _foreach_mul_1[109]\n getitem_702 = _foreach_mul_1[110]\n getitem_703 = _foreach_mul_1[111]\n getitem_704 = _foreach_mul_1[112]\n getitem_705 = _foreach_mul_1[113]\n getitem_706 = _foreach_mul_1[114]\n getitem_707 = _foreach_mul_1[115]\n getitem_708 = _foreach_mul_1[116]\n getitem_709 = _foreach_mul_1[117]\n getitem_710 = _foreach_mul_1[118]\n getitem_711 = _foreach_mul_1[119]\n getitem_712 = _foreach_mul_1[120]\n getitem_713 = _foreach_mul_1[121]\n getitem_714 = _foreach_mul_1[122]\n getitem_715 = _foreach_mul_1[123]\n getitem_716 = _foreach_mul_1[124]\n getitem_717 = _foreach_mul_1[125]\n getitem_718 = _foreach_mul_1[126]\n getitem_719 = _foreach_mul_1[127]\n getitem_720 = _foreach_mul_1[128]\n getitem_721 = _foreach_mul_1[129]\n getitem_722 = _foreach_mul_1[130]\n getitem_723 = _foreach_mul_1[131]\n getitem_724 = _foreach_mul_1[132]\n getitem_725 = _foreach_mul_1[133]\n getitem_726 = _foreach_mul_1[134]\n getitem_727 = _foreach_mul_1[135]\n getitem_728 = _foreach_mul_1[136]\n getitem_729 = _foreach_mul_1[137]\n getitem_730 = _foreach_mul_1[138]\n getitem_731 = _foreach_mul_1[139]\n getitem_732 = _foreach_mul_1[140]\n getitem_733 = _foreach_mul_1[141]\n getitem_734 = _foreach_mul_1[142]\n getitem_735 = _foreach_mul_1[143]\n getitem_736 = _foreach_mul_1[144]\n getitem_737 = _foreach_mul_1[145]\n getitem_738 = _foreach_mul_1[146]\n getitem_739 = _foreach_mul_1[147]; _foreach_mul_1 = None\n _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]); arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None\n getitem_740 = _foreach_mul_2[0]\n getitem_741 = _foreach_mul_2[1]\n getitem_742 = _foreach_mul_2[2]\n getitem_743 = _foreach_mul_2[3]\n getitem_744 = _foreach_mul_2[4]\n getitem_745 = _foreach_mul_2[5]\n getitem_746 = _foreach_mul_2[6]\n getitem_747 = _foreach_mul_2[7]\n getitem_748 = _foreach_mul_2[8]\n getitem_749 = _foreach_mul_2[9]\n getitem_750 = _foreach_mul_2[10]\n getitem_751 = _foreach_mul_2[11]\n getitem_752 = _foreach_mul_2[12]\n getitem_753 = _foreach_mul_2[13]\n getitem_754 = _foreach_mul_2[14]\n getitem_755 = _foreach_mul_2[15]\n getitem_756 = _foreach_mul_2[16]\n getitem_757 = _foreach_mul_2[17]\n getitem_758 = _foreach_mul_2[18]\n getitem_759 = _foreach_mul_2[19]\n getitem_760 = _foreach_mul_2[20]\n getitem_761 = _foreach_mul_2[21]\n getitem_762 = _foreach_mul_2[22]\n getitem_763 = _foreach_mul_2[23]\n getitem_764 = _foreach_mul_2[24]\n getitem_765 = _foreach_mul_2[25]\n getitem_766 = _foreach_mul_2[26]\n getitem_767 = _foreach_mul_2[27]\n getitem_768 = _foreach_mul_2[28]\n getitem_769 = _foreach_mul_2[29]\n getitem_770 = _foreach_mul_2[30]\n getitem_771 = _foreach_mul_2[31]\n getitem_772 = _foreach_mul_2[32]\n getitem_773 = _foreach_mul_2[33]\n getitem_774 = _foreach_mul_2[34]\n getitem_775 = _foreach_mul_2[35]\n getitem_776 = _foreach_mul_2[36]\n getitem_777 = _foreach_mul_2[37]\n getitem_778 = _foreach_mul_2[38]\n getitem_779 = _foreach_mul_2[39]\n getitem_780 = _foreach_mul_2[40]\n getitem_781 = _foreach_mul_2[41]\n getitem_782 = _foreach_mul_2[42]\n getitem_783 = _foreach_mul_2[43]\n getitem_784 = _foreach_mul_2[44]\n getitem_785 = _foreach_mul_2[45]\n getitem_786 = _foreach_mul_2[46]\n getitem_787 = _foreach_mul_2[47]\n getitem_788 = _foreach_mul_2[48]\n getitem_789 = _foreach_mul_2[49]\n getitem_790 = _foreach_mul_2[50]\n getitem_791 = _foreach_mul_2[51]\n getitem_792 = _foreach_mul_2[52]\n getitem_793 = _foreach_mul_2[53]\n getitem_794 = _foreach_mul_2[54]\n getitem_795 = _foreach_mul_2[55]\n getitem_796 = _foreach_mul_2[56]\n getitem_797 = _foreach_mul_2[57]\n getitem_798 = _foreach_mul_2[58]\n getitem_799 = _foreach_mul_2[59]\n getitem_800 = _foreach_mul_2[60]\n getitem_801 = _foreach_mul_2[61]\n getitem_802 = _foreach_mul_2[62]\n getitem_803 = _foreach_mul_2[63]\n getitem_804 = _foreach_mul_2[64]\n getitem_805 = _foreach_mul_2[65]\n getitem_806 = _foreach_mul_2[66]\n getitem_807 = _foreach_mul_2[67]\n getitem_808 = _foreach_mul_2[68]\n getitem_809 = _foreach_mul_2[69]\n getitem_810 = _foreach_mul_2[70]\n getitem_811 = _foreach_mul_2[71]\n getitem_812 = _foreach_mul_2[72]\n getitem_813 = _foreach_mul_2[73]\n getitem_814 = _foreach_mul_2[74]\n getitem_815 = _foreach_mul_2[75]\n getitem_816 = _foreach_mul_2[76]\n getitem_817 = _foreach_mul_2[77]\n getitem_818 = _foreach_mul_2[78]\n getitem_819 = _foreach_mul_2[79]\n getitem_820 = _foreach_mul_2[80]\n getitem_821 = _foreach_mul_2[81]\n getitem_822 = _foreach_mul_2[82]\n getitem_823 = _foreach_mul_2[83]\n getitem_824 = _foreach_mul_2[84]\n getitem_825 = _foreach_mul_2[85]\n getitem_826 = _foreach_mul_2[86]\n getitem_827 = _foreach_mul_2[87]\n getitem_828 = _foreach_mul_2[88]\n getitem_829 = _foreach_mul_2[89]\n getitem_830 = _foreach_mul_2[90]\n getitem_831 = _foreach_mul_2[91]\n getitem_832 = _foreach_mul_2[92]\n getitem_833 = _foreach_mul_2[93]\n getitem_834 = _foreach_mul_2[94]\n getitem_835 = _foreach_mul_2[95]\n getitem_836 = _foreach_mul_2[96]\n getitem_837 = _foreach_mul_2[97]\n getitem_838 = _foreach_mul_2[98]\n getitem_839 = _foreach_mul_2[99]\n getitem_840 = _foreach_mul_2[100]\n getitem_841 = _foreach_mul_2[101]\n getitem_842 = _foreach_mul_2[102]\n getitem_843 = _foreach_mul_2[103]\n getitem_844 = _foreach_mul_2[104]\n getitem_845 = _foreach_mul_2[105]\n getitem_846 = _foreach_mul_2[106]\n getitem_847 = _foreach_mul_2[107]\n getitem_848 = _foreach_mul_2[108]\n getitem_849 = _foreach_mul_2[109]\n getitem_850 = _foreach_mul_2[110]\n getitem_851 = _foreach_mul_2[111]\n getitem_852 = _foreach_mul_2[112]\n getitem_853 = _foreach_mul_2[113]\n getitem_854 = _foreach_mul_2[114]\n getitem_855 = _foreach_mul_2[115]\n getitem_856 = _foreach_mul_2[116]\n getitem_857 = _foreach_mul_2[117]\n getitem_858 = _foreach_mul_2[118]\n getitem_859 = _foreach_mul_2[119]\n getitem_860 = _foreach_mul_2[120]\n getitem_861 = _foreach_mul_2[121]\n getitem_862 = _foreach_mul_2[122]\n getitem_863 = _foreach_mul_2[123]\n getitem_864 = _foreach_mul_2[124]\n getitem_865 = _foreach_mul_2[125]\n getitem_866 = _foreach_mul_2[126]\n getitem_867 = _foreach_mul_2[127]\n getitem_868 = _foreach_mul_2[128]\n getitem_869 = _foreach_mul_2[129]\n getitem_870 = _foreach_mul_2[130]\n getitem_871 = _foreach_mul_2[131]\n getitem_872 = _foreach_mul_2[132]\n getitem_873 = _foreach_mul_2[133]\n getitem_874 = _foreach_mul_2[134]\n getitem_875 = _foreach_mul_2[135]\n getitem_876 = _foreach_mul_2[136]\n getitem_877 = _foreach_mul_2[137]\n getitem_878 = _foreach_mul_2[138]\n getitem_879 = _foreach_mul_2[139]\n getitem_880 = _foreach_mul_2[140]\n getitem_881 = _foreach_mul_2[141]\n getitem_882 = _foreach_mul_2[142]\n getitem_883 = _foreach_mul_2[143]\n getitem_884 = _foreach_mul_2[144]\n getitem_885 = _foreach_mul_2[145]\n getitem_886 = _foreach_mul_2[146]\n getitem_887 = _foreach_mul_2[147]; _foreach_mul_2 = None\n _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009); getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None\n getitem_888 = _foreach_add_2[0]\n getitem_889 = _foreach_add_2[1]\n getitem_890 = _foreach_add_2[2]\n getitem_891 = _foreach_add_2[3]\n getitem_892 = _foreach_add_2[4]\n getitem_893 = _foreach_add_2[5]\n getitem_894 = _foreach_add_2[6]\n getitem_895 = _foreach_add_2[7]\n getitem_896 = _foreach_add_2[8]\n getitem_897 = _foreach_add_2[9]\n getitem_898 = _foreach_add_2[10]\n getitem_899 = _foreach_add_2[11]\n getitem_900 = _foreach_add_2[12]\n getitem_901 = _foreach_add_2[13]\n getitem_902 = _foreach_add_2[14]\n getitem_903 = _foreach_add_2[15]\n getitem_904 = _foreach_add_2[16]\n getitem_905 = _foreach_add_2[17]\n getitem_906 = _foreach_add_2[18]\n getitem_907 = _foreach_add_2[19]\n getitem_908 = _foreach_add_2[20]\n getitem_909 = _foreach_add_2[21]\n getitem_910 = _foreach_add_2[22]\n getitem_911 = _foreach_add_2[23]\n getitem_912 = _foreach_add_2[24]\n getitem_913 = _foreach_add_2[25]\n getitem_914 = _foreach_add_2[26]\n getitem_915 = _foreach_add_2[27]\n getitem_916 = _foreach_add_2[28]\n getitem_917 = _foreach_add_2[29]\n getitem_918 = _foreach_add_2[30]\n getitem_919 = _foreach_add_2[31]\n getitem_920 = _foreach_add_2[32]\n getitem_921 = _foreach_add_2[33]\n getitem_922 = _foreach_add_2[34]\n getitem_923 = _foreach_add_2[35]\n getitem_924 = _foreach_add_2[36]\n getitem_925 = _foreach_add_2[37]\n getitem_926 = _foreach_add_2[38]\n getitem_927 = _foreach_add_2[39]\n getitem_928 = _foreach_add_2[40]\n getitem_929 = _foreach_add_2[41]\n getitem_930 = _foreach_add_2[42]\n getitem_931 = _foreach_add_2[43]\n getitem_932 = _foreach_add_2[44]\n getitem_933 = _foreach_add_2[45]\n getitem_934 = _foreach_add_2[46]\n getitem_935 = _foreach_add_2[47]\n getitem_936 = _foreach_add_2[48]\n getitem_937 = _foreach_add_2[49]\n getitem_938 = _foreach_add_2[50]\n getitem_939 = _foreach_add_2[51]\n getitem_940 = _foreach_add_2[52]\n getitem_941 = _foreach_add_2[53]\n getitem_942 = _foreach_add_2[54]\n getitem_943 = _foreach_add_2[55]\n getitem_944 = _foreach_add_2[56]\n getitem_945 = _foreach_add_2[57]\n getitem_946 = _foreach_add_2[58]\n getitem_947 = _foreach_add_2[59]\n getitem_948 = _foreach_add_2[60]\n getitem_949 = _foreach_add_2[61]\n getitem_950 = _foreach_add_2[62]\n getitem_951 = _foreach_add_2[63]\n getitem_952 = _foreach_add_2[64]\n getitem_953 = _foreach_add_2[65]\n getitem_954 = _foreach_add_2[66]\n getitem_955 = _foreach_add_2[67]\n getitem_956 = _foreach_add_2[68]\n getitem_957 = _foreach_add_2[69]\n getitem_958 = _foreach_add_2[70]\n getitem_959 = _foreach_add_2[71]\n getitem_960 = _foreach_add_2[72]\n getitem_961 = _foreach_add_2[73]\n getitem_962 = _foreach_add_2[74]\n getitem_963 = _foreach_add_2[75]\n getitem_964 = _foreach_add_2[76]\n getitem_965 = _foreach_add_2[77]\n getitem_966 = _foreach_add_2[78]\n getitem_967 = _foreach_add_2[79]\n getitem_968 = _foreach_add_2[80]\n getitem_969 = _foreach_add_2[81]\n getitem_970 = _foreach_add_2[82]\n getitem_971 = _foreach_add_2[83]\n getitem_972 = _foreach_add_2[84]\n getitem_973 = _foreach_add_2[85]\n getitem_974 = _foreach_add_2[86]\n getitem_975 = _foreach_add_2[87]\n getitem_976 = _foreach_add_2[88]\n getitem_977 = _foreach_add_2[89]\n getitem_978 = _foreach_add_2[90]\n getitem_979 = _foreach_add_2[91]\n getitem_980 = _foreach_add_2[92]\n getitem_981 = _foreach_add_2[93]\n getitem_982 = _foreach_add_2[94]\n getitem_983 = _foreach_add_2[95]\n getitem_984 = _foreach_add_2[96]\n getitem_985 = _foreach_add_2[97]\n getitem_986 = _foreach_add_2[98]\n getitem_987 = _foreach_add_2[99]\n getitem_988 = _foreach_add_2[100]\n getitem_989 = _foreach_add_2[101]\n getitem_990 = _foreach_add_2[102]\n getitem_991 = _foreach_add_2[103]\n getitem_992 = _foreach_add_2[104]\n getitem_993 = _foreach_add_2[105]\n getitem_994 = _foreach_add_2[106]\n getitem_995 = _foreach_add_2[107]\n getitem_996 = _foreach_add_2[108]\n getitem_997 = _foreach_add_2[109]\n getitem_998 = _foreach_add_2[110]\n getitem_999 = _foreach_add_2[111]\n getitem_1000 = _foreach_add_2[112]\n getitem_1001 = _foreach_add_2[113]\n getitem_1002 = _foreach_add_2[114]\n getitem_1003 = _foreach_add_2[115]\n getitem_1004 = _foreach_add_2[116]\n getitem_1005 = _foreach_add_2[117]\n getitem_1006 = _foreach_add_2[118]\n getitem_1007 = _foreach_add_2[119]\n getitem_1008 = _foreach_add_2[120]\n getitem_1009 = _foreach_add_2[121]\n getitem_1010 = _foreach_add_2[122]\n getitem_1011 = _foreach_add_2[123]\n getitem_1012 = _foreach_add_2[124]\n getitem_1013 = _foreach_add_2[125]\n getitem_1014 = _foreach_add_2[126]\n getitem_1015 = _foreach_add_2[127]\n getitem_1016 = _foreach_add_2[128]\n getitem_1017 = _foreach_add_2[129]\n getitem_1018 = _foreach_add_2[130]\n getitem_1019 = _foreach_add_2[131]\n getitem_1020 = _foreach_add_2[132]\n getitem_1021 = _foreach_add_2[133]\n getitem_1022 = _foreach_add_2[134]\n getitem_1023 = _foreach_add_2[135]\n getitem_1024 = _foreach_add_2[136]\n getitem_1025 = _foreach_add_2[137]\n getitem_1026 = _foreach_add_2[138]\n getitem_1027 = _foreach_add_2[139]\n getitem_1028 = _foreach_add_2[140]\n getitem_1029 = _foreach_add_2[141]\n getitem_1030 = _foreach_add_2[142]\n getitem_1031 = _foreach_add_2[143]\n getitem_1032 = _foreach_add_2[144]\n getitem_1033 = _foreach_add_2[145]\n getitem_1034 = _foreach_add_2[146]\n getitem_1035 = _foreach_add_2[147]; _foreach_add_2 = None\n _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n getitem_1036 = _foreach_pow[0]\n getitem_1037 = _foreach_pow[1]\n getitem_1038 = _foreach_pow[2]\n getitem_1039 = _foreach_pow[3]\n getitem_1040 = _foreach_pow[4]\n getitem_1041 = _foreach_pow[5]\n getitem_1042 = _foreach_pow[6]\n getitem_1043 = _foreach_pow[7]\n getitem_1044 = _foreach_pow[8]\n getitem_1045 = _foreach_pow[9]\n getitem_1046 = _foreach_pow[10]\n getitem_1047 = _foreach_pow[11]\n getitem_1048 = _foreach_pow[12]\n getitem_1049 = _foreach_pow[13]\n getitem_1050 = _foreach_pow[14]\n getitem_1051 = _foreach_pow[15]\n getitem_1052 = _foreach_pow[16]\n getitem_1053 = _foreach_pow[17]\n getitem_1054 = _foreach_pow[18]\n getitem_1055 = _foreach_pow[19]\n getitem_1056 = _foreach_pow[20]\n getitem_1057 = _foreach_pow[21]\n getitem_1058 = _foreach_pow[22]\n getitem_1059 = _foreach_pow[23]\n getitem_1060 = _foreach_pow[24]\n getitem_1061 = _foreach_pow[25]\n getitem_1062 = _foreach_pow[26]\n getitem_1063 = _foreach_pow[27]\n getitem_1064 = _foreach_pow[28]\n getitem_1065 = _foreach_pow[29]\n getitem_1066 = _foreach_pow[30]\n getitem_1067 = _foreach_pow[31]\n getitem_1068 = _foreach_pow[32]\n getitem_1069 = _foreach_pow[33]\n getitem_1070 = _foreach_pow[34]\n getitem_1071 = _foreach_pow[35]\n getitem_1072 = _foreach_pow[36]\n getitem_1073 = _foreach_pow[37]\n getitem_1074 = _foreach_pow[38]\n getitem_1075 = _foreach_pow[39]\n getitem_1076 = _foreach_pow[40]\n getitem_1077 = _foreach_pow[41]\n getitem_1078 = _foreach_pow[42]\n getitem_1079 = _foreach_pow[43]\n getitem_1080 = _foreach_pow[44]\n getitem_1081 = _foreach_pow[45]\n getitem_1082 = _foreach_pow[46]\n getitem_1083 = _foreach_pow[47]\n getitem_1084 = _foreach_pow[48]\n getitem_1085 = _foreach_pow[49]\n getitem_1086 = _foreach_pow[50]\n getitem_1087 = _foreach_pow[51]\n getitem_1088 = _foreach_pow[52]\n getitem_1089 = _foreach_pow[53]\n getitem_1090 = _foreach_pow[54]\n getitem_1091 = _foreach_pow[55]\n getitem_1092 = _foreach_pow[56]\n getitem_1093 = _foreach_pow[57]\n getitem_1094 = _foreach_pow[58]\n getitem_1095 = _foreach_pow[59]\n getitem_1096 = _foreach_pow[60]\n getitem_1097 = _foreach_pow[61]\n getitem_1098 = _foreach_pow[62]\n getitem_1099 = _foreach_pow[63]\n getitem_1100 = _foreach_pow[64]\n getitem_1101 = _foreach_pow[65]\n getitem_1102 = _foreach_pow[66]\n getitem_1103 = _foreach_pow[67]\n getitem_1104 = _foreach_pow[68]\n getitem_1105 = _foreach_pow[69]\n getitem_1106 = _foreach_pow[70]\n getitem_1107 = _foreach_pow[71]\n getitem_1108 = _foreach_pow[72]\n getitem_1109 = _foreach_pow[73]\n getitem_1110 = _foreach_pow[74]\n getitem_1111 = _foreach_pow[75]\n getitem_1112 = _foreach_pow[76]\n getitem_1113 = _foreach_pow[77]\n getitem_1114 = _foreach_pow[78]\n getitem_1115 = _foreach_pow[79]\n getitem_1116 = _foreach_pow[80]\n getitem_1117 = _foreach_pow[81]\n getitem_1118 = _foreach_pow[82]\n getitem_1119 = _foreach_pow[83]\n getitem_1120 = _foreach_pow[84]\n getitem_1121 = _foreach_pow[85]\n getitem_1122 = _foreach_pow[86]\n getitem_1123 = _foreach_pow[87]\n getitem_1124 = _foreach_pow[88]\n getitem_1125 = _foreach_pow[89]\n getitem_1126 = _foreach_pow[90]\n getitem_1127 = _foreach_pow[91]\n getitem_1128 = _foreach_pow[92]\n getitem_1129 = _foreach_pow[93]\n getitem_1130 = _foreach_pow[94]\n getitem_1131 = _foreach_pow[95]\n getitem_1132 = _foreach_pow[96]\n getitem_1133 = _foreach_pow[97]\n getitem_1134 = _foreach_pow[98]\n getitem_1135 = _foreach_pow[99]\n getitem_1136 = _foreach_pow[100]\n getitem_1137 = _foreach_pow[101]\n getitem_1138 = _foreach_pow[102]\n getitem_1139 = _foreach_pow[103]\n getitem_1140 = _foreach_pow[104]\n getitem_1141 = _foreach_pow[105]\n getitem_1142 = _foreach_pow[106]\n getitem_1143 = _foreach_pow[107]\n getitem_1144 = _foreach_pow[108]\n getitem_1145 = _foreach_pow[109]\n getitem_1146 = _foreach_pow[110]\n getitem_1147 = _foreach_pow[111]\n getitem_1148 = _foreach_pow[112]\n getitem_1149 = _foreach_pow[113]\n getitem_1150 = _foreach_pow[114]\n getitem_1151 = _foreach_pow[115]\n getitem_1152 = _foreach_pow[116]\n getitem_1153 = _foreach_pow[117]\n getitem_1154 = _foreach_pow[118]\n getitem_1155 = _foreach_pow[119]\n getitem_1156 = _foreach_pow[120]\n getitem_1157 = _foreach_pow[121]\n getitem_1158 = _foreach_pow[122]\n getitem_1159 = _foreach_pow[123]\n getitem_1160 = _foreach_pow[124]\n getitem_1161 = _foreach_pow[125]\n getitem_1162 = _foreach_pow[126]\n getitem_1163 = _foreach_pow[127]\n getitem_1164 = _foreach_pow[128]\n getitem_1165 = _foreach_pow[129]\n getitem_1166 = _foreach_pow[130]\n getitem_1167 = _foreach_pow[131]\n getitem_1168 = _foreach_pow[132]\n getitem_1169 = _foreach_pow[133]\n getitem_1170 = _foreach_pow[134]\n getitem_1171 = _foreach_pow[135]\n getitem_1172 = _foreach_pow[136]\n getitem_1173 = _foreach_pow[137]\n getitem_1174 = _foreach_pow[138]\n getitem_1175 = _foreach_pow[139]\n getitem_1176 = _foreach_pow[140]\n getitem_1177 = _foreach_pow[141]\n getitem_1178 = _foreach_pow[142]\n getitem_1179 = _foreach_pow[143]\n getitem_1180 = _foreach_pow[144]\n getitem_1181 = _foreach_pow[145]\n getitem_1182 = _foreach_pow[146]\n getitem_1183 = _foreach_pow[147]; _foreach_pow = None\n _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n getitem_1184 = _foreach_pow_1[0]\n getitem_1185 = _foreach_pow_1[1]\n getitem_1186 = _foreach_pow_1[2]\n getitem_1187 = _foreach_pow_1[3]\n getitem_1188 = _foreach_pow_1[4]\n getitem_1189 = _foreach_pow_1[5]\n getitem_1190 = _foreach_pow_1[6]\n getitem_1191 = _foreach_pow_1[7]\n getitem_1192 = _foreach_pow_1[8]\n getitem_1193 = _foreach_pow_1[9]\n getitem_1194 = _foreach_pow_1[10]\n getitem_1195 = _foreach_pow_1[11]\n getitem_1196 = _foreach_pow_1[12]\n getitem_1197 = _foreach_pow_1[13]\n getitem_1198 = _foreach_pow_1[14]\n getitem_1199 = _foreach_pow_1[15]\n getitem_1200 = _foreach_pow_1[16]\n getitem_1201 = _foreach_pow_1[17]\n getitem_1202 = _foreach_pow_1[18]\n getitem_1203 = _foreach_pow_1[19]\n getitem_1204 = _foreach_pow_1[20]\n getitem_1205 = _foreach_pow_1[21]\n getitem_1206 = _foreach_pow_1[22]\n getitem_1207 = _foreach_pow_1[23]\n getitem_1208 = _foreach_pow_1[24]\n getitem_1209 = _foreach_pow_1[25]\n getitem_1210 = _foreach_pow_1[26]\n getitem_1211 = _foreach_pow_1[27]\n getitem_1212 = _foreach_pow_1[28]\n getitem_1213 = _foreach_pow_1[29]\n getitem_1214 = _foreach_pow_1[30]\n getitem_1215 = _foreach_pow_1[31]\n getitem_1216 = _foreach_pow_1[32]\n getitem_1217 = _foreach_pow_1[33]\n getitem_1218 = _foreach_pow_1[34]\n getitem_1219 = _foreach_pow_1[35]\n getitem_1220 = _foreach_pow_1[36]\n getitem_1221 = _foreach_pow_1[37]\n getitem_1222 = _foreach_pow_1[38]\n getitem_1223 = _foreach_pow_1[39]\n getitem_1224 = _foreach_pow_1[40]\n getitem_1225 = _foreach_pow_1[41]\n getitem_1226 = _foreach_pow_1[42]\n getitem_1227 = _foreach_pow_1[43]\n getitem_1228 = _foreach_pow_1[44]\n getitem_1229 = _foreach_pow_1[45]\n getitem_1230 = _foreach_pow_1[46]\n getitem_1231 = _foreach_pow_1[47]\n getitem_1232 = _foreach_pow_1[48]\n getitem_1233 = _foreach_pow_1[49]\n getitem_1234 = _foreach_pow_1[50]\n getitem_1235 = _foreach_pow_1[51]\n getitem_1236 = _foreach_pow_1[52]\n getitem_1237 = _foreach_pow_1[53]\n getitem_1238 = _foreach_pow_1[54]\n getitem_1239 = _foreach_pow_1[55]\n getitem_1240 = _foreach_pow_1[56]\n getitem_1241 = _foreach_pow_1[57]\n getitem_1242 = _foreach_pow_1[58]\n getitem_1243 = _foreach_pow_1[59]\n getitem_1244 = _foreach_pow_1[60]\n getitem_1245 = _foreach_pow_1[61]\n getitem_1246 = _foreach_pow_1[62]\n getitem_1247 = _foreach_pow_1[63]\n getitem_1248 = _foreach_pow_1[64]\n getitem_1249 = _foreach_pow_1[65]\n getitem_1250 = _foreach_pow_1[66]\n getitem_1251 = _foreach_pow_1[67]\n getitem_1252 = _foreach_pow_1[68]\n getitem_1253 = _foreach_pow_1[69]\n getitem_1254 = _foreach_pow_1[70]\n getitem_1255 = _foreach_pow_1[71]\n getitem_1256 = _foreach_pow_1[72]\n getitem_1257 = _foreach_pow_1[73]\n getitem_1258 = _foreach_pow_1[74]\n getitem_1259 = _foreach_pow_1[75]\n getitem_1260 = _foreach_pow_1[76]\n getitem_1261 = _foreach_pow_1[77]\n getitem_1262 = _foreach_pow_1[78]\n getitem_1263 = _foreach_pow_1[79]\n getitem_1264 = _foreach_pow_1[80]\n getitem_1265 = _foreach_pow_1[81]\n getitem_1266 = _foreach_pow_1[82]\n getitem_1267 = _foreach_pow_1[83]\n getitem_1268 = _foreach_pow_1[84]\n getitem_1269 = _foreach_pow_1[85]\n getitem_1270 = _foreach_pow_1[86]\n getitem_1271 = _foreach_pow_1[87]\n getitem_1272 = _foreach_pow_1[88]\n getitem_1273 = _foreach_pow_1[89]\n getitem_1274 = _foreach_pow_1[90]\n getitem_1275 = _foreach_pow_1[91]\n getitem_1276 = _foreach_pow_1[92]\n getitem_1277 = _foreach_pow_1[93]\n getitem_1278 = _foreach_pow_1[94]\n getitem_1279 = _foreach_pow_1[95]\n getitem_1280 = _foreach_pow_1[96]\n getitem_1281 = _foreach_pow_1[97]\n getitem_1282 = _foreach_pow_1[98]\n getitem_1283 = _foreach_pow_1[99]\n getitem_1284 = _foreach_pow_1[100]\n getitem_1285 = _foreach_pow_1[101]\n getitem_1286 = _foreach_pow_1[102]\n getitem_1287 = _foreach_pow_1[103]\n getitem_1288 = _foreach_pow_1[104]\n getitem_1289 = _foreach_pow_1[105]\n getitem_1290 = _foreach_pow_1[106]\n getitem_1291 = _foreach_pow_1[107]\n getitem_1292 = _foreach_pow_1[108]\n getitem_1293 = _foreach_pow_1[109]\n getitem_1294 = _foreach_pow_1[110]\n getitem_1295 = _foreach_pow_1[111]\n getitem_1296 = _foreach_pow_1[112]\n getitem_1297 = _foreach_pow_1[113]\n getitem_1298 = _foreach_pow_1[114]\n getitem_1299 = _foreach_pow_1[115]\n getitem_1300 = _foreach_pow_1[116]\n getitem_1301 = _foreach_pow_1[117]\n getitem_1302 = _foreach_pow_1[118]\n getitem_1303 = _foreach_pow_1[119]\n getitem_1304 = _foreach_pow_1[120]\n getitem_1305 = _foreach_pow_1[121]\n getitem_1306 = _foreach_pow_1[122]\n getitem_1307 = _foreach_pow_1[123]\n getitem_1308 = _foreach_pow_1[124]\n getitem_1309 = _foreach_pow_1[125]\n getitem_1310 = _foreach_pow_1[126]\n getitem_1311 = _foreach_pow_1[127]\n getitem_1312 = _foreach_pow_1[128]\n getitem_1313 = _foreach_pow_1[129]\n getitem_1314 = _foreach_pow_1[130]\n getitem_1315 = _foreach_pow_1[131]\n getitem_1316 = _foreach_pow_1[132]\n getitem_1317 = _foreach_pow_1[133]\n getitem_1318 = _foreach_pow_1[134]\n getitem_1319 = _foreach_pow_1[135]\n getitem_1320 = _foreach_pow_1[136]\n getitem_1321 = _foreach_pow_1[137]\n getitem_1322 = _foreach_pow_1[138]\n getitem_1323 = _foreach_pow_1[139]\n getitem_1324 = _foreach_pow_1[140]\n getitem_1325 = _foreach_pow_1[141]\n getitem_1326 = _foreach_pow_1[142]\n getitem_1327 = _foreach_pow_1[143]\n getitem_1328 = _foreach_pow_1[144]\n getitem_1329 = _foreach_pow_1[145]\n getitem_1330 = _foreach_pow_1[146]\n getitem_1331 = _foreach_pow_1[147]; _foreach_pow_1 = None\n _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1); getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None\n getitem_1332 = _foreach_sub_1[0]\n getitem_1333 = _foreach_sub_1[1]\n getitem_1334 = _foreach_sub_1[2]\n getitem_1335 = _foreach_sub_1[3]\n getitem_1336 = _foreach_sub_1[4]\n getitem_1337 = _foreach_sub_1[5]\n getitem_1338 = _foreach_sub_1[6]\n getitem_1339 = _foreach_sub_1[7]\n getitem_1340 = _foreach_sub_1[8]\n getitem_1341 = _foreach_sub_1[9]\n getitem_1342 = _foreach_sub_1[10]\n getitem_1343 = _foreach_sub_1[11]\n getitem_1344 = _foreach_sub_1[12]\n getitem_1345 = _foreach_sub_1[13]\n getitem_1346 = _foreach_sub_1[14]\n getitem_1347 = _foreach_sub_1[15]\n getitem_1348 = _foreach_sub_1[16]\n getitem_1349 = _foreach_sub_1[17]\n getitem_1350 = _foreach_sub_1[18]\n getitem_1351 = _foreach_sub_1[19]\n getitem_1352 = _foreach_sub_1[20]\n getitem_1353 = _foreach_sub_1[21]\n getitem_1354 = _foreach_sub_1[22]\n getitem_1355 = _foreach_sub_1[23]\n getitem_1356 = _foreach_sub_1[24]\n getitem_1357 = _foreach_sub_1[25]\n getitem_1358 = _foreach_sub_1[26]\n getitem_1359 = _foreach_sub_1[27]\n getitem_1360 = _foreach_sub_1[28]\n getitem_1361 = _foreach_sub_1[29]\n getitem_1362 = _foreach_sub_1[30]\n getitem_1363 = _foreach_sub_1[31]\n getitem_1364 = _foreach_sub_1[32]\n getitem_1365 = _foreach_sub_1[33]\n getitem_1366 = _foreach_sub_1[34]\n getitem_1367 = _foreach_sub_1[35]\n getitem_1368 = _foreach_sub_1[36]\n getitem_1369 = _foreach_sub_1[37]\n getitem_1370 = _foreach_sub_1[38]\n getitem_1371 = _foreach_sub_1[39]\n getitem_1372 = _foreach_sub_1[40]\n getitem_1373 = _foreach_sub_1[41]\n getitem_1374 = _foreach_sub_1[42]\n getitem_1375 = _foreach_sub_1[43]\n getitem_1376 = _foreach_sub_1[44]\n getitem_1377 = _foreach_sub_1[45]\n getitem_1378 = _foreach_sub_1[46]\n getitem_1379 = _foreach_sub_1[47]\n getitem_1380 = _foreach_sub_1[48]\n getitem_1381 = _foreach_sub_1[49]\n getitem_1382 = _foreach_sub_1[50]\n getitem_1383 = _foreach_sub_1[51]\n getitem_1384 = _foreach_sub_1[52]\n getitem_1385 = _foreach_sub_1[53]\n getitem_1386 = _foreach_sub_1[54]\n getitem_1387 = _foreach_sub_1[55]\n getitem_1388 = _foreach_sub_1[56]\n getitem_1389 = _foreach_sub_1[57]\n getitem_1390 = _foreach_sub_1[58]\n getitem_1391 = _foreach_sub_1[59]\n getitem_1392 = _foreach_sub_1[60]\n getitem_1393 = _foreach_sub_1[61]\n getitem_1394 = _foreach_sub_1[62]\n getitem_1395 = _foreach_sub_1[63]\n getitem_1396 = _foreach_sub_1[64]\n getitem_1397 = _foreach_sub_1[65]\n getitem_1398 = _foreach_sub_1[66]\n getitem_1399 = _foreach_sub_1[67]\n getitem_1400 = _foreach_sub_1[68]\n getitem_1401 = _foreach_sub_1[69]\n getitem_1402 = _foreach_sub_1[70]\n getitem_1403 = _foreach_sub_1[71]\n getitem_1404 = _foreach_sub_1[72]\n getitem_1405 = _foreach_sub_1[73]\n getitem_1406 = _foreach_sub_1[74]\n getitem_1407 = _foreach_sub_1[75]\n getitem_1408 = _foreach_sub_1[76]\n getitem_1409 = _foreach_sub_1[77]\n getitem_1410 = _foreach_sub_1[78]\n getitem_1411 = _foreach_sub_1[79]\n getitem_1412 = _foreach_sub_1[80]\n getitem_1413 = _foreach_sub_1[81]\n getitem_1414 = _foreach_sub_1[82]\n getitem_1415 = _foreach_sub_1[83]\n getitem_1416 = _foreach_sub_1[84]\n getitem_1417 = _foreach_sub_1[85]\n getitem_1418 = _foreach_sub_1[86]\n getitem_1419 = _foreach_sub_1[87]\n getitem_1420 = _foreach_sub_1[88]\n getitem_1421 = _foreach_sub_1[89]\n getitem_1422 = _foreach_sub_1[90]\n getitem_1423 = _foreach_sub_1[91]\n getitem_1424 = _foreach_sub_1[92]\n getitem_1425 = _foreach_sub_1[93]\n getitem_1426 = _foreach_sub_1[94]\n getitem_1427 = _foreach_sub_1[95]\n getitem_1428 = _foreach_sub_1[96]\n getitem_1429 = _foreach_sub_1[97]\n getitem_1430 = _foreach_sub_1[98]\n getitem_1431 = _foreach_sub_1[99]\n getitem_1432 = _foreach_sub_1[100]\n getitem_1433 = _foreach_sub_1[101]\n getitem_1434 = _foreach_sub_1[102]\n getitem_1435 = _foreach_sub_1[103]\n getitem_1436 = _foreach_sub_1[104]\n getitem_1437 = _foreach_sub_1[105]\n getitem_1438 = _foreach_sub_1[106]\n getitem_1439 = _foreach_sub_1[107]\n getitem_1440 = _foreach_sub_1[108]\n getitem_1441 = _foreach_sub_1[109]\n getitem_1442 = _foreach_sub_1[110]\n getitem_1443 = _foreach_sub_1[111]\n getitem_1444 = _foreach_sub_1[112]\n getitem_1445 = _foreach_sub_1[113]\n getitem_1446 = _foreach_sub_1[114]\n getitem_1447 = _foreach_sub_1[115]\n getitem_1448 = _foreach_sub_1[116]\n getitem_1449 = _foreach_sub_1[117]\n getitem_1450 = _foreach_sub_1[118]\n getitem_1451 = _foreach_sub_1[119]\n getitem_1452 = _foreach_sub_1[120]\n getitem_1453 = _foreach_sub_1[121]\n getitem_1454 = _foreach_sub_1[122]\n getitem_1455 = _foreach_sub_1[123]\n getitem_1456 = _foreach_sub_1[124]\n getitem_1457 = _foreach_sub_1[125]\n getitem_1458 = _foreach_sub_1[126]\n getitem_1459 = _foreach_sub_1[127]\n getitem_1460 = _foreach_sub_1[128]\n getitem_1461 = _foreach_sub_1[129]\n getitem_1462 = _foreach_sub_1[130]\n getitem_1463 = _foreach_sub_1[131]\n getitem_1464 = _foreach_sub_1[132]\n getitem_1465 = _foreach_sub_1[133]\n getitem_1466 = _foreach_sub_1[134]\n getitem_1467 = _foreach_sub_1[135]\n getitem_1468 = _foreach_sub_1[136]\n getitem_1469 = _foreach_sub_1[137]\n getitem_1470 = _foreach_sub_1[138]\n getitem_1471 = _foreach_sub_1[139]\n getitem_1472 = _foreach_sub_1[140]\n getitem_1473 = _foreach_sub_1[141]\n getitem_1474 = _foreach_sub_1[142]\n getitem_1475 = _foreach_sub_1[143]\n getitem_1476 = _foreach_sub_1[144]\n getitem_1477 = _foreach_sub_1[145]\n getitem_1478 = _foreach_sub_1[146]\n getitem_1479 = _foreach_sub_1[147]; _foreach_sub_1 = None\n _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1); getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None\n getitem_1480 = _foreach_sub_2[0]\n getitem_1481 = _foreach_sub_2[1]\n getitem_1482 = _foreach_sub_2[2]\n getitem_1483 = _foreach_sub_2[3]\n getitem_1484 = _foreach_sub_2[4]\n getitem_1485 = _foreach_sub_2[5]\n getitem_1486 = _foreach_sub_2[6]\n getitem_1487 = _foreach_sub_2[7]\n getitem_1488 = _foreach_sub_2[8]\n getitem_1489 = _foreach_sub_2[9]\n getitem_1490 = _foreach_sub_2[10]\n getitem_1491 = _foreach_sub_2[11]\n getitem_1492 = _foreach_sub_2[12]\n getitem_1493 = _foreach_sub_2[13]\n getitem_1494 = _foreach_sub_2[14]\n getitem_1495 = _foreach_sub_2[15]\n getitem_1496 = _foreach_sub_2[16]\n getitem_1497 = _foreach_sub_2[17]\n getitem_1498 = _foreach_sub_2[18]\n getitem_1499 = _foreach_sub_2[19]\n getitem_1500 = _foreach_sub_2[20]\n getitem_1501 = _foreach_sub_2[21]\n getitem_1502 = _foreach_sub_2[22]\n getitem_1503 = _foreach_sub_2[23]\n getitem_1504 = _foreach_sub_2[24]\n getitem_1505 = _foreach_sub_2[25]\n getitem_1506 = _foreach_sub_2[26]\n getitem_1507 = _foreach_sub_2[27]\n getitem_1508 = _foreach_sub_2[28]\n getitem_1509 = _foreach_sub_2[29]\n getitem_1510 = _foreach_sub_2[30]\n getitem_1511 = _foreach_sub_2[31]\n getitem_1512 = _foreach_sub_2[32]\n getitem_1513 = _foreach_sub_2[33]\n getitem_1514 = _foreach_sub_2[34]\n getitem_1515 = _foreach_sub_2[35]\n getitem_1516 = _foreach_sub_2[36]\n getitem_1517 = _foreach_sub_2[37]\n getitem_1518 = _foreach_sub_2[38]\n getitem_1519 = _foreach_sub_2[39]\n getitem_1520 = _foreach_sub_2[40]\n getitem_1521 = _foreach_sub_2[41]\n getitem_1522 = _foreach_sub_2[42]\n getitem_1523 = _foreach_sub_2[43]\n getitem_1524 = _foreach_sub_2[44]\n getitem_1525 = _foreach_sub_2[45]\n getitem_1526 = _foreach_sub_2[46]\n getitem_1527 = _foreach_sub_2[47]\n getitem_1528 = _foreach_sub_2[48]\n getitem_1529 = _foreach_sub_2[49]\n getitem_1530 = _foreach_sub_2[50]\n getitem_1531 = _foreach_sub_2[51]\n getitem_1532 = _foreach_sub_2[52]\n getitem_1533 = _foreach_sub_2[53]\n getitem_1534 = _foreach_sub_2[54]\n getitem_1535 = _foreach_sub_2[55]\n getitem_1536 = _foreach_sub_2[56]\n getitem_1537 = _foreach_sub_2[57]\n getitem_1538 = _foreach_sub_2[58]\n getitem_1539 = _foreach_sub_2[59]\n getitem_1540 = _foreach_sub_2[60]\n getitem_1541 = _foreach_sub_2[61]\n getitem_1542 = _foreach_sub_2[62]\n getitem_1543 = _foreach_sub_2[63]\n getitem_1544 = _foreach_sub_2[64]\n getitem_1545 = _foreach_sub_2[65]\n getitem_1546 = _foreach_sub_2[66]\n getitem_1547 = _foreach_sub_2[67]\n getitem_1548 = _foreach_sub_2[68]\n getitem_1549 = _foreach_sub_2[69]\n getitem_1550 = _foreach_sub_2[70]\n getitem_1551 = _foreach_sub_2[71]\n getitem_1552 = _foreach_sub_2[72]\n getitem_1553 = _foreach_sub_2[73]\n getitem_1554 = _foreach_sub_2[74]\n getitem_1555 = _foreach_sub_2[75]\n getitem_1556 = _foreach_sub_2[76]\n getitem_1557 = _foreach_sub_2[77]\n getitem_1558 = _foreach_sub_2[78]\n getitem_1559 = _foreach_sub_2[79]\n getitem_1560 = _foreach_sub_2[80]\n getitem_1561 = _foreach_sub_2[81]\n getitem_1562 = _foreach_sub_2[82]\n getitem_1563 = _foreach_sub_2[83]\n getitem_1564 = _foreach_sub_2[84]\n getitem_1565 = _foreach_sub_2[85]\n getitem_1566 = _foreach_sub_2[86]\n getitem_1567 = _foreach_sub_2[87]\n getitem_1568 = _foreach_sub_2[88]\n getitem_1569 = _foreach_sub_2[89]\n getitem_1570 = _foreach_sub_2[90]\n getitem_1571 = _foreach_sub_2[91]\n getitem_1572 = _foreach_sub_2[92]\n getitem_1573 = _foreach_sub_2[93]\n getitem_1574 = _foreach_sub_2[94]\n getitem_1575 = _foreach_sub_2[95]\n getitem_1576 = _foreach_sub_2[96]\n getitem_1577 = _foreach_sub_2[97]\n getitem_1578 = _foreach_sub_2[98]\n getitem_1579 = _foreach_sub_2[99]\n getitem_1580 = _foreach_sub_2[100]\n getitem_1581 = _foreach_sub_2[101]\n getitem_1582 = _foreach_sub_2[102]\n getitem_1583 = _foreach_sub_2[103]\n getitem_1584 = _foreach_sub_2[104]\n getitem_1585 = _foreach_sub_2[105]\n getitem_1586 = _foreach_sub_2[106]\n getitem_1587 = _foreach_sub_2[107]\n getitem_1588 = _foreach_sub_2[108]\n getitem_1589 = _foreach_sub_2[109]\n getitem_1590 = _foreach_sub_2[110]\n getitem_1591 = _foreach_sub_2[111]\n getitem_1592 = _foreach_sub_2[112]\n getitem_1593 = _foreach_sub_2[113]\n getitem_1594 = _foreach_sub_2[114]\n getitem_1595 = _foreach_sub_2[115]\n getitem_1596 = _foreach_sub_2[116]\n getitem_1597 = _foreach_sub_2[117]\n getitem_1598 = _foreach_sub_2[118]\n getitem_1599 = _foreach_sub_2[119]\n getitem_1600 = _foreach_sub_2[120]\n getitem_1601 = _foreach_sub_2[121]\n getitem_1602 = _foreach_sub_2[122]\n getitem_1603 = _foreach_sub_2[123]\n getitem_1604 = _foreach_sub_2[124]\n getitem_1605 = _foreach_sub_2[125]\n getitem_1606 = _foreach_sub_2[126]\n getitem_1607 = _foreach_sub_2[127]\n getitem_1608 = _foreach_sub_2[128]\n getitem_1609 = _foreach_sub_2[129]\n getitem_1610 = _foreach_sub_2[130]\n getitem_1611 = _foreach_sub_2[131]\n getitem_1612 = _foreach_sub_2[132]\n getitem_1613 = _foreach_sub_2[133]\n getitem_1614 = _foreach_sub_2[134]\n getitem_1615 = _foreach_sub_2[135]\n getitem_1616 = _foreach_sub_2[136]\n getitem_1617 = _foreach_sub_2[137]\n getitem_1618 = _foreach_sub_2[138]\n getitem_1619 = _foreach_sub_2[139]\n getitem_1620 = _foreach_sub_2[140]\n getitem_1621 = _foreach_sub_2[141]\n getitem_1622 = _foreach_sub_2[142]\n getitem_1623 = _foreach_sub_2[143]\n getitem_1624 = _foreach_sub_2[144]\n getitem_1625 = _foreach_sub_2[145]\n getitem_1626 = _foreach_sub_2[146]\n getitem_1627 = _foreach_sub_2[147]; _foreach_sub_2 = None\n _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]); getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None\n getitem_1628 = _foreach_neg[0]\n getitem_1629 = _foreach_neg[1]\n getitem_1630 = _foreach_neg[2]\n getitem_1631 = _foreach_neg[3]\n getitem_1632 = _foreach_neg[4]\n getitem_1633 = _foreach_neg[5]\n getitem_1634 = _foreach_neg[6]\n getitem_1635 = _foreach_neg[7]\n getitem_1636 = _foreach_neg[8]\n getitem_1637 = _foreach_neg[9]\n getitem_1638 = _foreach_neg[10]\n getitem_1639 = _foreach_neg[11]\n getitem_1640 = _foreach_neg[12]\n getitem_1641 = _foreach_neg[13]\n getitem_1642 = _foreach_neg[14]\n getitem_1643 = _foreach_neg[15]\n getitem_1644 = _foreach_neg[16]\n getitem_1645 = _foreach_neg[17]\n getitem_1646 = _foreach_neg[18]\n getitem_1647 = _foreach_neg[19]\n getitem_1648 = _foreach_neg[20]\n getitem_1649 = _foreach_neg[21]\n getitem_1650 = _foreach_neg[22]\n getitem_1651 = _foreach_neg[23]\n getitem_1652 = _foreach_neg[24]\n getitem_1653 = _foreach_neg[25]\n getitem_1654 = _foreach_neg[26]\n getitem_1655 = _foreach_neg[27]\n getitem_1656 = _foreach_neg[28]\n getitem_1657 = _foreach_neg[29]\n getitem_1658 = _foreach_neg[30]\n getitem_1659 = _foreach_neg[31]\n getitem_1660 = _foreach_neg[32]\n getitem_1661 = _foreach_neg[33]\n getitem_1662 = _foreach_neg[34]\n getitem_1663 = _foreach_neg[35]\n getitem_1664 = _foreach_neg[36]\n getitem_1665 = _foreach_neg[37]\n getitem_1666 = _foreach_neg[38]\n getitem_1667 = _foreach_neg[39]\n getitem_1668 = _foreach_neg[40]\n getitem_1669 = _foreach_neg[41]\n getitem_1670 = _foreach_neg[42]\n getitem_1671 = _foreach_neg[43]\n getitem_1672 = _foreach_neg[44]\n getitem_1673 = _foreach_neg[45]\n getitem_1674 = _foreach_neg[46]\n getitem_1675 = _foreach_neg[47]\n getitem_1676 = _foreach_neg[48]\n getitem_1677 = _foreach_neg[49]\n getitem_1678 = _foreach_neg[50]\n getitem_1679 = _foreach_neg[51]\n getitem_1680 = _foreach_neg[52]\n getitem_1681 = _foreach_neg[53]\n getitem_1682 = _foreach_neg[54]\n getitem_1683 = _foreach_neg[55]\n getitem_1684 = _foreach_neg[56]\n getitem_1685 = _foreach_neg[57]\n getitem_1686 = _foreach_neg[58]\n getitem_1687 = _foreach_neg[59]\n getitem_1688 = _foreach_neg[60]\n getitem_1689 = _foreach_neg[61]\n getitem_1690 = _foreach_neg[62]\n getitem_1691 = _foreach_neg[63]\n getitem_1692 = _foreach_neg[64]\n getitem_1693 = _foreach_neg[65]\n getitem_1694 = _foreach_neg[66]\n getitem_1695 = _foreach_neg[67]\n getitem_1696 = _foreach_neg[68]\n getitem_1697 = _foreach_neg[69]\n getitem_1698 = _foreach_neg[70]\n getitem_1699 = _foreach_neg[71]\n getitem_1700 = _foreach_neg[72]\n getitem_1701 = _foreach_neg[73]\n getitem_1702 = _foreach_neg[74]\n getitem_1703 = _foreach_neg[75]\n getitem_1704 = _foreach_neg[76]\n getitem_1705 = _foreach_neg[77]\n getitem_1706 = _foreach_neg[78]\n getitem_1707 = _foreach_neg[79]\n getitem_1708 = _foreach_neg[80]\n getitem_1709 = _foreach_neg[81]\n getitem_1710 = _foreach_neg[82]\n getitem_1711 = _foreach_neg[83]\n getitem_1712 = _foreach_neg[84]\n getitem_1713 = _foreach_neg[85]\n getitem_1714 = _foreach_neg[86]\n getitem_1715 = _foreach_neg[87]\n getitem_1716 = _foreach_neg[88]\n getitem_1717 = _foreach_neg[89]\n getitem_1718 = _foreach_neg[90]\n getitem_1719 = _foreach_neg[91]\n getitem_1720 = _foreach_neg[92]\n getitem_1721 = _foreach_neg[93]\n getitem_1722 = _foreach_neg[94]\n getitem_1723 = _foreach_neg[95]\n getitem_1724 = _foreach_neg[96]\n getitem_1725 = _foreach_neg[97]\n getitem_1726 = _foreach_neg[98]\n getitem_1727 = _foreach_neg[99]\n getitem_1728 = _foreach_neg[100]\n getitem_1729 = _foreach_neg[101]\n getitem_1730 = _foreach_neg[102]\n getitem_1731 = _foreach_neg[103]\n getitem_1732 = _foreach_neg[104]\n getitem_1733 = _foreach_neg[105]\n getitem_1734 = _foreach_neg[106]\n getitem_1735 = _foreach_neg[107]\n getitem_1736 = _foreach_neg[108]\n getitem_1737 = _foreach_neg[109]\n getitem_1738 = _foreach_neg[110]\n getitem_1739 = _foreach_neg[111]\n getitem_1740 = _foreach_neg[112]\n getitem_1741 = _foreach_neg[113]\n getitem_1742 = _foreach_neg[114]\n getitem_1743 = _foreach_neg[115]\n getitem_1744 = _foreach_neg[116]\n getitem_1745 = _foreach_neg[117]\n getitem_1746 = _foreach_neg[118]\n getitem_1747 = _foreach_neg[119]\n getitem_1748 = _foreach_neg[120]\n getitem_1749 = _foreach_neg[121]\n getitem_1750 = _foreach_neg[122]\n getitem_1751 = _foreach_neg[123]\n getitem_1752 = _foreach_neg[124]\n getitem_1753 = _foreach_neg[125]\n getitem_1754 = _foreach_neg[126]\n getitem_1755 = _foreach_neg[127]\n getitem_1756 = _foreach_neg[128]\n getitem_1757 = _foreach_neg[129]\n getitem_1758 = _foreach_neg[130]\n getitem_1759 = _foreach_neg[131]\n getitem_1760 = _foreach_neg[132]\n getitem_1761 = _foreach_neg[133]\n getitem_1762 = _foreach_neg[134]\n getitem_1763 = _foreach_neg[135]\n getitem_1764 = _foreach_neg[136]\n getitem_1765 = _foreach_neg[137]\n getitem_1766 = _foreach_neg[138]\n getitem_1767 = _foreach_neg[139]\n getitem_1768 = _foreach_neg[140]\n getitem_1769 = _foreach_neg[141]\n getitem_1770 = _foreach_neg[142]\n getitem_1771 = _foreach_neg[143]\n getitem_1772 = _foreach_neg[144]\n getitem_1773 = _foreach_neg[145]\n getitem_1774 = _foreach_neg[146]\n getitem_1775 = _foreach_neg[147]; _foreach_neg = None\n _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01); getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None\n getitem_1776 = _foreach_div[0]\n getitem_1777 = _foreach_div[1]\n getitem_1778 = _foreach_div[2]\n getitem_1779 = _foreach_div[3]\n getitem_1780 = _foreach_div[4]\n getitem_1781 = _foreach_div[5]\n getitem_1782 = _foreach_div[6]\n getitem_1783 = _foreach_div[7]\n getitem_1784 = _foreach_div[8]\n getitem_1785 = _foreach_div[9]\n getitem_1786 = _foreach_div[10]\n getitem_1787 = _foreach_div[11]\n getitem_1788 = _foreach_div[12]\n getitem_1789 = _foreach_div[13]\n getitem_1790 = _foreach_div[14]\n getitem_1791 = _foreach_div[15]\n getitem_1792 = _foreach_div[16]\n getitem_1793 = _foreach_div[17]\n getitem_1794 = _foreach_div[18]\n getitem_1795 = _foreach_div[19]\n getitem_1796 = _foreach_div[20]\n getitem_1797 = _foreach_div[21]\n getitem_1798 = _foreach_div[22]\n getitem_1799 = _foreach_div[23]\n getitem_1800 = _foreach_div[24]\n getitem_1801 = _foreach_div[25]\n getitem_1802 = _foreach_div[26]\n getitem_1803 = _foreach_div[27]\n getitem_1804 = _foreach_div[28]\n getitem_1805 = _foreach_div[29]\n getitem_1806 = _foreach_div[30]\n getitem_1807 = _foreach_div[31]\n getitem_1808 = _foreach_div[32]\n getitem_1809 = _foreach_div[33]\n getitem_1810 = _foreach_div[34]\n getitem_1811 = _foreach_div[35]\n getitem_1812 = _foreach_div[36]\n getitem_1813 = _foreach_div[37]\n getitem_1814 = _foreach_div[38]\n getitem_1815 = _foreach_div[39]\n getitem_1816 = _foreach_div[40]\n getitem_1817 = _foreach_div[41]\n getitem_1818 = _foreach_div[42]\n getitem_1819 = _foreach_div[43]\n getitem_1820 = _foreach_div[44]\n getitem_1821 = _foreach_div[45]\n getitem_1822 = _foreach_div[46]\n getitem_1823 = _foreach_div[47]\n getitem_1824 = _foreach_div[48]\n getitem_1825 = _foreach_div[49]\n getitem_1826 = _foreach_div[50]\n getitem_1827 = _foreach_div[51]\n getitem_1828 = _foreach_div[52]\n getitem_1829 = _foreach_div[53]\n getitem_1830 = _foreach_div[54]\n getitem_1831 = _foreach_div[55]\n getitem_1832 = _foreach_div[56]\n getitem_1833 = _foreach_div[57]\n getitem_1834 = _foreach_div[58]\n getitem_1835 = _foreach_div[59]\n getitem_1836 = _foreach_div[60]\n getitem_1837 = _foreach_div[61]\n getitem_1838 = _foreach_div[62]\n getitem_1839 = _foreach_div[63]\n getitem_1840 = _foreach_div[64]\n getitem_1841 = _foreach_div[65]\n getitem_1842 = _foreach_div[66]\n getitem_1843 = _foreach_div[67]\n getitem_1844 = _foreach_div[68]\n getitem_1845 = _foreach_div[69]\n getitem_1846 = _foreach_div[70]\n getitem_1847 = _foreach_div[71]\n getitem_1848 = _foreach_div[72]\n getitem_1849 = _foreach_div[73]\n getitem_1850 = _foreach_div[74]\n getitem_1851 = _foreach_div[75]\n getitem_1852 = _foreach_div[76]\n getitem_1853 = _foreach_div[77]\n getitem_1854 = _foreach_div[78]\n getitem_1855 = _foreach_div[79]\n getitem_1856 = _foreach_div[80]\n getitem_1857 = _foreach_div[81]\n getitem_1858 = _foreach_div[82]\n getitem_1859 = _foreach_div[83]\n getitem_1860 = _foreach_div[84]\n getitem_1861 = _foreach_div[85]\n getitem_1862 = _foreach_div[86]\n getitem_1863 = _foreach_div[87]\n getitem_1864 = _foreach_div[88]\n getitem_1865 = _foreach_div[89]\n getitem_1866 = _foreach_div[90]\n getitem_1867 = _foreach_div[91]\n getitem_1868 = _foreach_div[92]\n getitem_1869 = _foreach_div[93]\n getitem_1870 = _foreach_div[94]\n getitem_1871 = _foreach_div[95]\n getitem_1872 = _foreach_div[96]\n getitem_1873 = _foreach_div[97]\n getitem_1874 = _foreach_div[98]\n getitem_1875 = _foreach_div[99]\n getitem_1876 = _foreach_div[100]\n getitem_1877 = _foreach_div[101]\n getitem_1878 = _foreach_div[102]\n getitem_1879 = _foreach_div[103]\n getitem_1880 = _foreach_div[104]\n getitem_1881 = _foreach_div[105]\n getitem_1882 = _foreach_div[106]\n getitem_1883 = _foreach_div[107]\n getitem_1884 = _foreach_div[108]\n getitem_1885 = _foreach_div[109]\n getitem_1886 = _foreach_div[110]\n getitem_1887 = _foreach_div[111]\n getitem_1888 = _foreach_div[112]\n getitem_1889 = _foreach_div[113]\n getitem_1890 = _foreach_div[114]\n getitem_1891 = _foreach_div[115]\n getitem_1892 = _foreach_div[116]\n getitem_1893 = _foreach_div[117]\n getitem_1894 = _foreach_div[118]\n getitem_1895 = _foreach_div[119]\n getitem_1896 = _foreach_div[120]\n getitem_1897 = _foreach_div[121]\n getitem_1898 = _foreach_div[122]\n getitem_1899 = _foreach_div[123]\n getitem_1900 = _foreach_div[124]\n getitem_1901 = _foreach_div[125]\n getitem_1902 = _foreach_div[126]\n getitem_1903 = _foreach_div[127]\n getitem_1904 = _foreach_div[128]\n getitem_1905 = _foreach_div[129]\n getitem_1906 = _foreach_div[130]\n getitem_1907 = _foreach_div[131]\n getitem_1908 = _foreach_div[132]\n getitem_1909 = _foreach_div[133]\n getitem_1910 = _foreach_div[134]\n getitem_1911 = _foreach_div[135]\n getitem_1912 = _foreach_div[136]\n getitem_1913 = _foreach_div[137]\n getitem_1914 = _foreach_div[138]\n getitem_1915 = _foreach_div[139]\n getitem_1916 = _foreach_div[140]\n getitem_1917 = _foreach_div[141]\n getitem_1918 = _foreach_div[142]\n getitem_1919 = _foreach_div[143]\n getitem_1920 = _foreach_div[144]\n getitem_1921 = _foreach_div[145]\n getitem_1922 = _foreach_div[146]\n getitem_1923 = _foreach_div[147]; _foreach_div = None\n _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]); getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None\n getitem_1924 = _foreach_reciprocal[0]\n getitem_1925 = _foreach_reciprocal[1]\n getitem_1926 = _foreach_reciprocal[2]\n getitem_1927 = _foreach_reciprocal[3]\n getitem_1928 = _foreach_reciprocal[4]\n getitem_1929 = _foreach_reciprocal[5]\n getitem_1930 = _foreach_reciprocal[6]\n getitem_1931 = _foreach_reciprocal[7]\n getitem_1932 = _foreach_reciprocal[8]\n getitem_1933 = _foreach_reciprocal[9]\n getitem_1934 = _foreach_reciprocal[10]\n getitem_1935 = _foreach_reciprocal[11]\n getitem_1936 = _foreach_reciprocal[12]\n getitem_1937 = _foreach_reciprocal[13]\n getitem_1938 = _foreach_reciprocal[14]\n getitem_1939 = _foreach_reciprocal[15]\n getitem_1940 = _foreach_reciprocal[16]\n getitem_1941 = _foreach_reciprocal[17]\n getitem_1942 = _foreach_reciprocal[18]\n getitem_1943 = _foreach_reciprocal[19]\n getitem_1944 = _foreach_reciprocal[20]\n getitem_1945 = _foreach_reciprocal[21]\n getitem_1946 = _foreach_reciprocal[22]\n getitem_1947 = _foreach_reciprocal[23]\n getitem_1948 = _foreach_reciprocal[24]\n getitem_1949 = _foreach_reciprocal[25]\n getitem_1950 = _foreach_reciprocal[26]\n getitem_1951 = _foreach_reciprocal[27]\n getitem_1952 = _foreach_reciprocal[28]\n getitem_1953 = _foreach_reciprocal[29]\n getitem_1954 = _foreach_reciprocal[30]\n getitem_1955 = _foreach_reciprocal[31]\n getitem_1956 = _foreach_reciprocal[32]\n getitem_1957 = _foreach_reciprocal[33]\n getitem_1958 = _foreach_reciprocal[34]\n getitem_1959 = _foreach_reciprocal[35]\n getitem_1960 = _foreach_reciprocal[36]\n getitem_1961 = _foreach_reciprocal[37]\n getitem_1962 = _foreach_reciprocal[38]\n getitem_1963 = _foreach_reciprocal[39]\n getitem_1964 = _foreach_reciprocal[40]\n getitem_1965 = _foreach_reciprocal[41]\n getitem_1966 = _foreach_reciprocal[42]\n getitem_1967 = _foreach_reciprocal[43]\n getitem_1968 = _foreach_reciprocal[44]\n getitem_1969 = _foreach_reciprocal[45]\n getitem_1970 = _foreach_reciprocal[46]\n getitem_1971 = _foreach_reciprocal[47]\n getitem_1972 = _foreach_reciprocal[48]\n getitem_1973 = _foreach_reciprocal[49]\n getitem_1974 = _foreach_reciprocal[50]\n getitem_1975 = _foreach_reciprocal[51]\n getitem_1976 = _foreach_reciprocal[52]\n getitem_1977 = _foreach_reciprocal[53]\n getitem_1978 = _foreach_reciprocal[54]\n getitem_1979 = _foreach_reciprocal[55]\n getitem_1980 = _foreach_reciprocal[56]\n getitem_1981 = _foreach_reciprocal[57]\n getitem_1982 = _foreach_reciprocal[58]\n getitem_1983 = _foreach_reciprocal[59]\n getitem_1984 = _foreach_reciprocal[60]\n getitem_1985 = _foreach_reciprocal[61]\n getitem_1986 = _foreach_reciprocal[62]\n getitem_1987 = _foreach_reciprocal[63]\n getitem_1988 = _foreach_reciprocal[64]\n getitem_1989 = _foreach_reciprocal[65]\n getitem_1990 = _foreach_reciprocal[66]\n getitem_1991 = _foreach_reciprocal[67]\n getitem_1992 = _foreach_reciprocal[68]\n getitem_1993 = _foreach_reciprocal[69]\n getitem_1994 = _foreach_reciprocal[70]\n getitem_1995 = _foreach_reciprocal[71]\n getitem_1996 = _foreach_reciprocal[72]\n getitem_1997 = _foreach_reciprocal[73]\n getitem_1998 = _foreach_reciprocal[74]\n getitem_1999 = _foreach_reciprocal[75]\n getitem_2000 = _foreach_reciprocal[76]\n getitem_2001 = _foreach_reciprocal[77]\n getitem_2002 = _foreach_reciprocal[78]\n getitem_2003 = _foreach_reciprocal[79]\n getitem_2004 = _foreach_reciprocal[80]\n getitem_2005 = _foreach_reciprocal[81]\n getitem_2006 = _foreach_reciprocal[82]\n getitem_2007 = _foreach_reciprocal[83]\n getitem_2008 = _foreach_reciprocal[84]\n getitem_2009 = _foreach_reciprocal[85]\n getitem_2010 = _foreach_reciprocal[86]\n getitem_2011 = _foreach_reciprocal[87]\n getitem_2012 = _foreach_reciprocal[88]\n getitem_2013 = _foreach_reciprocal[89]\n getitem_2014 = _foreach_reciprocal[90]\n getitem_2015 = _foreach_reciprocal[91]\n getitem_2016 = _foreach_reciprocal[92]\n getitem_2017 = _foreach_reciprocal[93]\n getitem_2018 = _foreach_reciprocal[94]\n getitem_2019 = _foreach_reciprocal[95]\n getitem_2020 = _foreach_reciprocal[96]\n getitem_2021 = _foreach_reciprocal[97]\n getitem_2022 = _foreach_reciprocal[98]\n getitem_2023 = _foreach_reciprocal[99]\n getitem_2024 = _foreach_reciprocal[100]\n getitem_2025 = _foreach_reciprocal[101]\n getitem_2026 = _foreach_reciprocal[102]\n getitem_2027 = _foreach_reciprocal[103]\n getitem_2028 = _foreach_reciprocal[104]\n getitem_2029 = _foreach_reciprocal[105]\n getitem_2030 = _foreach_reciprocal[106]\n getitem_2031 = _foreach_reciprocal[107]\n getitem_2032 = _foreach_reciprocal[108]\n getitem_2033 = _foreach_reciprocal[109]\n getitem_2034 = _foreach_reciprocal[110]\n getitem_2035 = _foreach_reciprocal[111]\n getitem_2036 = _foreach_reciprocal[112]\n getitem_2037 = _foreach_reciprocal[113]\n getitem_2038 = _foreach_reciprocal[114]\n getitem_2039 = _foreach_reciprocal[115]\n getitem_2040 = _foreach_reciprocal[116]\n getitem_2041 = _foreach_reciprocal[117]\n getitem_2042 = _foreach_reciprocal[118]\n getitem_2043 = _foreach_reciprocal[119]\n getitem_2044 = _foreach_reciprocal[120]\n getitem_2045 = _foreach_reciprocal[121]\n getitem_2046 = _foreach_reciprocal[122]\n getitem_2047 = _foreach_reciprocal[123]\n getitem_2048 = _foreach_reciprocal[124]\n getitem_2049 = _foreach_reciprocal[125]\n getitem_2050 = _foreach_reciprocal[126]\n getitem_2051 = _foreach_reciprocal[127]\n getitem_2052 = _foreach_reciprocal[128]\n getitem_2053 = _foreach_reciprocal[129]\n getitem_2054 = _foreach_reciprocal[130]\n getitem_2055 = _foreach_reciprocal[131]\n getitem_2056 = _foreach_reciprocal[132]\n getitem_2057 = _foreach_reciprocal[133]\n getitem_2058 = _foreach_reciprocal[134]\n getitem_2059 = _foreach_reciprocal[135]\n getitem_2060 = _foreach_reciprocal[136]\n getitem_2061 = _foreach_reciprocal[137]\n getitem_2062 = _foreach_reciprocal[138]\n getitem_2063 = _foreach_reciprocal[139]\n getitem_2064 = _foreach_reciprocal[140]\n getitem_2065 = _foreach_reciprocal[141]\n getitem_2066 = _foreach_reciprocal[142]\n getitem_2067 = _foreach_reciprocal[143]\n getitem_2068 = _foreach_reciprocal[144]\n getitem_2069 = _foreach_reciprocal[145]\n getitem_2070 = _foreach_reciprocal[146]\n getitem_2071 = _foreach_reciprocal[147]; _foreach_reciprocal = None\n _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]); getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None\n getitem_2072 = _foreach_sqrt[0]\n getitem_2073 = _foreach_sqrt[1]\n getitem_2074 = _foreach_sqrt[2]\n getitem_2075 = _foreach_sqrt[3]\n getitem_2076 = _foreach_sqrt[4]\n getitem_2077 = _foreach_sqrt[5]\n getitem_2078 = _foreach_sqrt[6]\n getitem_2079 = _foreach_sqrt[7]\n getitem_2080 = _foreach_sqrt[8]\n getitem_2081 = _foreach_sqrt[9]\n getitem_2082 = _foreach_sqrt[10]\n getitem_2083 = _foreach_sqrt[11]\n getitem_2084 = _foreach_sqrt[12]\n getitem_2085 = _foreach_sqrt[13]\n getitem_2086 = _foreach_sqrt[14]\n getitem_2087 = _foreach_sqrt[15]\n getitem_2088 = _foreach_sqrt[16]\n getitem_2089 = _foreach_sqrt[17]\n getitem_2090 = _foreach_sqrt[18]\n getitem_2091 = _foreach_sqrt[19]\n getitem_2092 = _foreach_sqrt[20]\n getitem_2093 = _foreach_sqrt[21]\n getitem_2094 = _foreach_sqrt[22]\n getitem_2095 = _foreach_sqrt[23]\n getitem_2096 = _foreach_sqrt[24]\n getitem_2097 = _foreach_sqrt[25]\n getitem_2098 = _foreach_sqrt[26]\n getitem_2099 = _foreach_sqrt[27]\n getitem_2100 = _foreach_sqrt[28]\n getitem_2101 = _foreach_sqrt[29]\n getitem_2102 = _foreach_sqrt[30]\n getitem_2103 = _foreach_sqrt[31]\n getitem_2104 = _foreach_sqrt[32]\n getitem_2105 = _foreach_sqrt[33]\n getitem_2106 = _foreach_sqrt[34]\n getitem_2107 = _foreach_sqrt[35]\n getitem_2108 = _foreach_sqrt[36]\n getitem_2109 = _foreach_sqrt[37]\n getitem_2110 = _foreach_sqrt[38]\n getitem_2111 = _foreach_sqrt[39]\n getitem_2112 = _foreach_sqrt[40]\n getitem_2113 = _foreach_sqrt[41]\n getitem_2114 = _foreach_sqrt[42]\n getitem_2115 = _foreach_sqrt[43]\n getitem_2116 = _foreach_sqrt[44]\n getitem_2117 = _foreach_sqrt[45]\n getitem_2118 = _foreach_sqrt[46]\n getitem_2119 = _foreach_sqrt[47]\n getitem_2120 = _foreach_sqrt[48]\n getitem_2121 = _foreach_sqrt[49]\n getitem_2122 = _foreach_sqrt[50]\n getitem_2123 = _foreach_sqrt[51]\n getitem_2124 = _foreach_sqrt[52]\n getitem_2125 = _foreach_sqrt[53]\n getitem_2126 = _foreach_sqrt[54]\n getitem_2127 = _foreach_sqrt[55]\n getitem_2128 = _foreach_sqrt[56]\n getitem_2129 = _foreach_sqrt[57]\n getitem_2130 = _foreach_sqrt[58]\n getitem_2131 = _foreach_sqrt[59]\n getitem_2132 = _foreach_sqrt[60]\n getitem_2133 = _foreach_sqrt[61]\n getitem_2134 = _foreach_sqrt[62]\n getitem_2135 = _foreach_sqrt[63]\n getitem_2136 = _foreach_sqrt[64]\n getitem_2137 = _foreach_sqrt[65]\n getitem_2138 = _foreach_sqrt[66]\n getitem_2139 = _foreach_sqrt[67]\n getitem_2140 = _foreach_sqrt[68]\n getitem_2141 = _foreach_sqrt[69]\n getitem_2142 = _foreach_sqrt[70]\n getitem_2143 = _foreach_sqrt[71]\n getitem_2144 = _foreach_sqrt[72]\n getitem_2145 = _foreach_sqrt[73]\n getitem_2146 = _foreach_sqrt[74]\n getitem_2147 = _foreach_sqrt[75]\n getitem_2148 = _foreach_sqrt[76]\n getitem_2149 = _foreach_sqrt[77]\n getitem_2150 = _foreach_sqrt[78]\n getitem_2151 = _foreach_sqrt[79]\n getitem_2152 = _foreach_sqrt[80]\n getitem_2153 = _foreach_sqrt[81]\n getitem_2154 = _foreach_sqrt[82]\n getitem_2155 = _foreach_sqrt[83]\n getitem_2156 = _foreach_sqrt[84]\n getitem_2157 = _foreach_sqrt[85]\n getitem_2158 = _foreach_sqrt[86]\n getitem_2159 = _foreach_sqrt[87]\n getitem_2160 = _foreach_sqrt[88]\n getitem_2161 = _foreach_sqrt[89]\n getitem_2162 = _foreach_sqrt[90]\n getitem_2163 = _foreach_sqrt[91]\n getitem_2164 = _foreach_sqrt[92]\n getitem_2165 = _foreach_sqrt[93]\n getitem_2166 = _foreach_sqrt[94]\n getitem_2167 = _foreach_sqrt[95]\n getitem_2168 = _foreach_sqrt[96]\n getitem_2169 = _foreach_sqrt[97]\n getitem_2170 = _foreach_sqrt[98]\n getitem_2171 = _foreach_sqrt[99]\n getitem_2172 = _foreach_sqrt[100]\n getitem_2173 = _foreach_sqrt[101]\n getitem_2174 = _foreach_sqrt[102]\n getitem_2175 = _foreach_sqrt[103]\n getitem_2176 = _foreach_sqrt[104]\n getitem_2177 = _foreach_sqrt[105]\n getitem_2178 = _foreach_sqrt[106]\n getitem_2179 = _foreach_sqrt[107]\n getitem_2180 = _foreach_sqrt[108]\n getitem_2181 = _foreach_sqrt[109]\n getitem_2182 = _foreach_sqrt[110]\n getitem_2183 = _foreach_sqrt[111]\n getitem_2184 = _foreach_sqrt[112]\n getitem_2185 = _foreach_sqrt[113]\n getitem_2186 = _foreach_sqrt[114]\n getitem_2187 = _foreach_sqrt[115]\n getitem_2188 = _foreach_sqrt[116]\n getitem_2189 = _foreach_sqrt[117]\n getitem_2190 = _foreach_sqrt[118]\n getitem_2191 = _foreach_sqrt[119]\n getitem_2192 = _foreach_sqrt[120]\n getitem_2193 = _foreach_sqrt[121]\n getitem_2194 = _foreach_sqrt[122]\n getitem_2195 = _foreach_sqrt[123]\n getitem_2196 = _foreach_sqrt[124]\n getitem_2197 = _foreach_sqrt[125]\n getitem_2198 = _foreach_sqrt[126]\n getitem_2199 = _foreach_sqrt[127]\n getitem_2200 = _foreach_sqrt[128]\n getitem_2201 = _foreach_sqrt[129]\n getitem_2202 = _foreach_sqrt[130]\n getitem_2203 = _foreach_sqrt[131]\n getitem_2204 = _foreach_sqrt[132]\n getitem_2205 = _foreach_sqrt[133]\n getitem_2206 = _foreach_sqrt[134]\n getitem_2207 = _foreach_sqrt[135]\n getitem_2208 = _foreach_sqrt[136]\n getitem_2209 = _foreach_sqrt[137]\n getitem_2210 = _foreach_sqrt[138]\n getitem_2211 = _foreach_sqrt[139]\n getitem_2212 = _foreach_sqrt[140]\n getitem_2213 = _foreach_sqrt[141]\n getitem_2214 = _foreach_sqrt[142]\n getitem_2215 = _foreach_sqrt[143]\n getitem_2216 = _foreach_sqrt[144]\n getitem_2217 = _foreach_sqrt[145]\n getitem_2218 = _foreach_sqrt[146]\n getitem_2219 = _foreach_sqrt[147]; _foreach_sqrt = None\n _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])\n getitem_2220 = _foreach_sqrt_1[0]\n getitem_2221 = _foreach_sqrt_1[1]\n getitem_2222 = _foreach_sqrt_1[2]\n getitem_2223 = _foreach_sqrt_1[3]\n getitem_2224 = _foreach_sqrt_1[4]\n getitem_2225 = _foreach_sqrt_1[5]\n getitem_2226 = _foreach_sqrt_1[6]\n getitem_2227 = _foreach_sqrt_1[7]\n getitem_2228 = _foreach_sqrt_1[8]\n getitem_2229 = _foreach_sqrt_1[9]\n getitem_2230 = _foreach_sqrt_1[10]\n getitem_2231 = _foreach_sqrt_1[11]\n getitem_2232 = _foreach_sqrt_1[12]\n getitem_2233 = _foreach_sqrt_1[13]\n getitem_2234 = _foreach_sqrt_1[14]\n getitem_2235 = _foreach_sqrt_1[15]\n getitem_2236 = _foreach_sqrt_1[16]\n getitem_2237 = _foreach_sqrt_1[17]\n getitem_2238 = _foreach_sqrt_1[18]\n getitem_2239 = _foreach_sqrt_1[19]\n getitem_2240 = _foreach_sqrt_1[20]\n getitem_2241 = _foreach_sqrt_1[21]\n getitem_2242 = _foreach_sqrt_1[22]\n getitem_2243 = _foreach_sqrt_1[23]\n getitem_2244 = _foreach_sqrt_1[24]\n getitem_2245 = _foreach_sqrt_1[25]\n getitem_2246 = _foreach_sqrt_1[26]\n getitem_2247 = _foreach_sqrt_1[27]\n getitem_2248 = _foreach_sqrt_1[28]\n getitem_2249 = _foreach_sqrt_1[29]\n getitem_2250 = _foreach_sqrt_1[30]\n getitem_2251 = _foreach_sqrt_1[31]\n getitem_2252 = _foreach_sqrt_1[32]\n getitem_2253 = _foreach_sqrt_1[33]\n getitem_2254 = _foreach_sqrt_1[34]\n getitem_2255 = _foreach_sqrt_1[35]\n getitem_2256 = _foreach_sqrt_1[36]\n getitem_2257 = _foreach_sqrt_1[37]\n getitem_2258 = _foreach_sqrt_1[38]\n getitem_2259 = _foreach_sqrt_1[39]\n getitem_2260 = _foreach_sqrt_1[40]\n getitem_2261 = _foreach_sqrt_1[41]\n getitem_2262 = _foreach_sqrt_1[42]\n getitem_2263 = _foreach_sqrt_1[43]\n getitem_2264 = _foreach_sqrt_1[44]\n getitem_2265 = _foreach_sqrt_1[45]\n getitem_2266 = _foreach_sqrt_1[46]\n getitem_2267 = _foreach_sqrt_1[47]\n getitem_2268 = _foreach_sqrt_1[48]\n getitem_2269 = _foreach_sqrt_1[49]\n getitem_2270 = _foreach_sqrt_1[50]\n getitem_2271 = _foreach_sqrt_1[51]\n getitem_2272 = _foreach_sqrt_1[52]\n getitem_2273 = _foreach_sqrt_1[53]\n getitem_2274 = _foreach_sqrt_1[54]\n getitem_2275 = _foreach_sqrt_1[55]\n getitem_2276 = _foreach_sqrt_1[56]\n getitem_2277 = _foreach_sqrt_1[57]\n getitem_2278 = _foreach_sqrt_1[58]\n getitem_2279 = _foreach_sqrt_1[59]\n getitem_2280 = _foreach_sqrt_1[60]\n getitem_2281 = _foreach_sqrt_1[61]\n getitem_2282 = _foreach_sqrt_1[62]\n getitem_2283 = _foreach_sqrt_1[63]\n getitem_2284 = _foreach_sqrt_1[64]\n getitem_2285 = _foreach_sqrt_1[65]\n getitem_2286 = _foreach_sqrt_1[66]\n getitem_2287 = _foreach_sqrt_1[67]\n getitem_2288 = _foreach_sqrt_1[68]\n getitem_2289 = _foreach_sqrt_1[69]\n getitem_2290 = _foreach_sqrt_1[70]\n getitem_2291 = _foreach_sqrt_1[71]\n getitem_2292 = _foreach_sqrt_1[72]\n getitem_2293 = _foreach_sqrt_1[73]\n getitem_2294 = _foreach_sqrt_1[74]\n getitem_2295 = _foreach_sqrt_1[75]\n getitem_2296 = _foreach_sqrt_1[76]\n getitem_2297 = _foreach_sqrt_1[77]\n getitem_2298 = _foreach_sqrt_1[78]\n getitem_2299 = _foreach_sqrt_1[79]\n getitem_2300 = _foreach_sqrt_1[80]\n getitem_2301 = _foreach_sqrt_1[81]\n getitem_2302 = _foreach_sqrt_1[82]\n getitem_2303 = _foreach_sqrt_1[83]\n getitem_2304 = _foreach_sqrt_1[84]\n getitem_2305 = _foreach_sqrt_1[85]\n getitem_2306 = _foreach_sqrt_1[86]\n getitem_2307 = _foreach_sqrt_1[87]\n getitem_2308 = _foreach_sqrt_1[88]\n getitem_2309 = _foreach_sqrt_1[89]\n getitem_2310 = _foreach_sqrt_1[90]\n getitem_2311 = _foreach_sqrt_1[91]\n getitem_2312 = _foreach_sqrt_1[92]\n getitem_2313 = _foreach_sqrt_1[93]\n getitem_2314 = _foreach_sqrt_1[94]\n getitem_2315 = _foreach_sqrt_1[95]\n getitem_2316 = _foreach_sqrt_1[96]\n getitem_2317 = _foreach_sqrt_1[97]\n getitem_2318 = _foreach_sqrt_1[98]\n getitem_2319 = _foreach_sqrt_1[99]\n getitem_2320 = _foreach_sqrt_1[100]\n getitem_2321 = _foreach_sqrt_1[101]\n getitem_2322 = _foreach_sqrt_1[102]\n getitem_2323 = _foreach_sqrt_1[103]\n getitem_2324 = _foreach_sqrt_1[104]\n getitem_2325 = _foreach_sqrt_1[105]\n getitem_2326 = _foreach_sqrt_1[106]\n getitem_2327 = _foreach_sqrt_1[107]\n getitem_2328 = _foreach_sqrt_1[108]\n getitem_2329 = _foreach_sqrt_1[109]\n getitem_2330 = _foreach_sqrt_1[110]\n getitem_2331 = _foreach_sqrt_1[111]\n getitem_2332 = _foreach_sqrt_1[112]\n getitem_2333 = _foreach_sqrt_1[113]\n getitem_2334 = _foreach_sqrt_1[114]\n getitem_2335 = _foreach_sqrt_1[115]\n getitem_2336 = _foreach_sqrt_1[116]\n getitem_2337 = _foreach_sqrt_1[117]\n getitem_2338 = _foreach_sqrt_1[118]\n getitem_2339 = _foreach_sqrt_1[119]\n getitem_2340 = _foreach_sqrt_1[120]\n getitem_2341 = _foreach_sqrt_1[121]\n getitem_2342 = _foreach_sqrt_1[122]\n getitem_2343 = _foreach_sqrt_1[123]\n getitem_2344 = _foreach_sqrt_1[124]\n getitem_2345 = _foreach_sqrt_1[125]\n getitem_2346 = _foreach_sqrt_1[126]\n getitem_2347 = _foreach_sqrt_1[127]\n getitem_2348 = _foreach_sqrt_1[128]\n getitem_2349 = _foreach_sqrt_1[129]\n getitem_2350 = _foreach_sqrt_1[130]\n getitem_2351 = _foreach_sqrt_1[131]\n getitem_2352 = _foreach_sqrt_1[132]\n getitem_2353 = _foreach_sqrt_1[133]\n getitem_2354 = _foreach_sqrt_1[134]\n getitem_2355 = _foreach_sqrt_1[135]\n getitem_2356 = _foreach_sqrt_1[136]\n getitem_2357 = _foreach_sqrt_1[137]\n getitem_2358 = _foreach_sqrt_1[138]\n getitem_2359 = _foreach_sqrt_1[139]\n getitem_2360 = _foreach_sqrt_1[140]\n getitem_2361 = _foreach_sqrt_1[141]\n getitem_2362 = _foreach_sqrt_1[142]\n getitem_2363 = _foreach_sqrt_1[143]\n getitem_2364 = _foreach_sqrt_1[144]\n getitem_2365 = _foreach_sqrt_1[145]\n getitem_2366 = _foreach_sqrt_1[146]\n getitem_2367 = _foreach_sqrt_1[147]; _foreach_sqrt_1 = None\n _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]); getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None\n getitem_2368 = _foreach_div_1[0]\n getitem_2369 = _foreach_div_1[1]\n getitem_2370 = _foreach_div_1[2]\n getitem_2371 = _foreach_div_1[3]\n getitem_2372 = _foreach_div_1[4]\n getitem_2373 = _foreach_div_1[5]\n getitem_2374 = _foreach_div_1[6]\n getitem_2375 = _foreach_div_1[7]\n getitem_2376 = _foreach_div_1[8]\n getitem_2377 = _foreach_div_1[9]\n getitem_2378 = _foreach_div_1[10]\n getitem_2379 = _foreach_div_1[11]\n getitem_2380 = _foreach_div_1[12]\n getitem_2381 = _foreach_div_1[13]\n getitem_2382 = _foreach_div_1[14]\n getitem_2383 = _foreach_div_1[15]\n getitem_2384 = _foreach_div_1[16]\n getitem_2385 = _foreach_div_1[17]\n getitem_2386 = _foreach_div_1[18]\n getitem_2387 = _foreach_div_1[19]\n getitem_2388 = _foreach_div_1[20]\n getitem_2389 = _foreach_div_1[21]\n getitem_2390 = _foreach_div_1[22]\n getitem_2391 = _foreach_div_1[23]\n getitem_2392 = _foreach_div_1[24]\n getitem_2393 = _foreach_div_1[25]\n getitem_2394 = _foreach_div_1[26]\n getitem_2395 = _foreach_div_1[27]\n getitem_2396 = _foreach_div_1[28]\n getitem_2397 = _foreach_div_1[29]\n getitem_2398 = _foreach_div_1[30]\n getitem_2399 = _foreach_div_1[31]\n getitem_2400 = _foreach_div_1[32]\n getitem_2401 = _foreach_div_1[33]\n getitem_2402 = _foreach_div_1[34]\n getitem_2403 = _foreach_div_1[35]\n getitem_2404 = _foreach_div_1[36]\n getitem_2405 = _foreach_div_1[37]\n getitem_2406 = _foreach_div_1[38]\n getitem_2407 = _foreach_div_1[39]\n getitem_2408 = _foreach_div_1[40]\n getitem_2409 = _foreach_div_1[41]\n getitem_2410 = _foreach_div_1[42]\n getitem_2411 = _foreach_div_1[43]\n getitem_2412 = _foreach_div_1[44]\n getitem_2413 = _foreach_div_1[45]\n getitem_2414 = _foreach_div_1[46]\n getitem_2415 = _foreach_div_1[47]\n getitem_2416 = _foreach_div_1[48]\n getitem_2417 = _foreach_div_1[49]\n getitem_2418 = _foreach_div_1[50]\n getitem_2419 = _foreach_div_1[51]\n getitem_2420 = _foreach_div_1[52]\n getitem_2421 = _foreach_div_1[53]\n getitem_2422 = _foreach_div_1[54]\n getitem_2423 = _foreach_div_1[55]\n getitem_2424 = _foreach_div_1[56]\n getitem_2425 = _foreach_div_1[57]\n getitem_2426 = _foreach_div_1[58]\n getitem_2427 = _foreach_div_1[59]\n getitem_2428 = _foreach_div_1[60]\n getitem_2429 = _foreach_div_1[61]\n getitem_2430 = _foreach_div_1[62]\n getitem_2431 = _foreach_div_1[63]\n getitem_2432 = _foreach_div_1[64]\n getitem_2433 = _foreach_div_1[65]\n getitem_2434 = _foreach_div_1[66]\n getitem_2435 = _foreach_div_1[67]\n getitem_2436 = _foreach_div_1[68]\n getitem_2437 = _foreach_div_1[69]\n getitem_2438 = _foreach_div_1[70]\n getitem_2439 = _foreach_div_1[71]\n getitem_2440 = _foreach_div_1[72]\n getitem_2441 = _foreach_div_1[73]\n getitem_2442 = _foreach_div_1[74]\n getitem_2443 = _foreach_div_1[75]\n getitem_2444 = _foreach_div_1[76]\n getitem_2445 = _foreach_div_1[77]\n getitem_2446 = _foreach_div_1[78]\n getitem_2447 = _foreach_div_1[79]\n getitem_2448 = _foreach_div_1[80]\n getitem_2449 = _foreach_div_1[81]\n getitem_2450 = _foreach_div_1[82]\n getitem_2451 = _foreach_div_1[83]\n getitem_2452 = _foreach_div_1[84]\n getitem_2453 = _foreach_div_1[85]\n getitem_2454 = _foreach_div_1[86]\n getitem_2455 = _foreach_div_1[87]\n getitem_2456 = _foreach_div_1[88]\n getitem_2457 = _foreach_div_1[89]\n getitem_2458 = _foreach_div_1[90]\n getitem_2459 = _foreach_div_1[91]\n getitem_2460 = _foreach_div_1[92]\n getitem_2461 = _foreach_div_1[93]\n getitem_2462 = _foreach_div_1[94]\n getitem_2463 = _foreach_div_1[95]\n getitem_2464 = _foreach_div_1[96]\n getitem_2465 = _foreach_div_1[97]\n getitem_2466 = _foreach_div_1[98]\n getitem_2467 = _foreach_div_1[99]\n getitem_2468 = _foreach_div_1[100]\n getitem_2469 = _foreach_div_1[101]\n getitem_2470 = _foreach_div_1[102]\n getitem_2471 = _foreach_div_1[103]\n getitem_2472 = _foreach_div_1[104]\n getitem_2473 = _foreach_div_1[105]\n getitem_2474 = _foreach_div_1[106]\n getitem_2475 = _foreach_div_1[107]\n getitem_2476 = _foreach_div_1[108]\n getitem_2477 = _foreach_div_1[109]\n getitem_2478 = _foreach_div_1[110]\n getitem_2479 = _foreach_div_1[111]\n getitem_2480 = _foreach_div_1[112]\n getitem_2481 = _foreach_div_1[113]\n getitem_2482 = _foreach_div_1[114]\n getitem_2483 = _foreach_div_1[115]\n getitem_2484 = _foreach_div_1[116]\n getitem_2485 = _foreach_div_1[117]\n getitem_2486 = _foreach_div_1[118]\n getitem_2487 = _foreach_div_1[119]\n getitem_2488 = _foreach_div_1[120]\n getitem_2489 = _foreach_div_1[121]\n getitem_2490 = _foreach_div_1[122]\n getitem_2491 = _foreach_div_1[123]\n getitem_2492 = _foreach_div_1[124]\n getitem_2493 = _foreach_div_1[125]\n getitem_2494 = _foreach_div_1[126]\n getitem_2495 = _foreach_div_1[127]\n getitem_2496 = _foreach_div_1[128]\n getitem_2497 = _foreach_div_1[129]\n getitem_2498 = _foreach_div_1[130]\n getitem_2499 = _foreach_div_1[131]\n getitem_2500 = _foreach_div_1[132]\n getitem_2501 = _foreach_div_1[133]\n getitem_2502 = _foreach_div_1[134]\n getitem_2503 = _foreach_div_1[135]\n getitem_2504 = _foreach_div_1[136]\n getitem_2505 = _foreach_div_1[137]\n getitem_2506 = _foreach_div_1[138]\n getitem_2507 = _foreach_div_1[139]\n getitem_2508 = _foreach_div_1[140]\n getitem_2509 = _foreach_div_1[141]\n getitem_2510 = _foreach_div_1[142]\n getitem_2511 = _foreach_div_1[143]\n getitem_2512 = _foreach_div_1[144]\n getitem_2513 = _foreach_div_1[145]\n getitem_2514 = _foreach_div_1[146]\n getitem_2515 = _foreach_div_1[147]; _foreach_div_1 = None\n _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08); getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None\n getitem_2516 = _foreach_add_3[0]\n getitem_2517 = _foreach_add_3[1]\n getitem_2518 = _foreach_add_3[2]\n getitem_2519 = _foreach_add_3[3]\n getitem_2520 = _foreach_add_3[4]\n getitem_2521 = _foreach_add_3[5]\n getitem_2522 = _foreach_add_3[6]\n getitem_2523 = _foreach_add_3[7]\n getitem_2524 = _foreach_add_3[8]\n getitem_2525 = _foreach_add_3[9]\n getitem_2526 = _foreach_add_3[10]\n getitem_2527 = _foreach_add_3[11]\n getitem_2528 = _foreach_add_3[12]\n getitem_2529 = _foreach_add_3[13]\n getitem_2530 = _foreach_add_3[14]\n getitem_2531 = _foreach_add_3[15]\n getitem_2532 = _foreach_add_3[16]\n getitem_2533 = _foreach_add_3[17]\n getitem_2534 = _foreach_add_3[18]\n getitem_2535 = _foreach_add_3[19]\n getitem_2536 = _foreach_add_3[20]\n getitem_2537 = _foreach_add_3[21]\n getitem_2538 = _foreach_add_3[22]\n getitem_2539 = _foreach_add_3[23]\n getitem_2540 = _foreach_add_3[24]\n getitem_2541 = _foreach_add_3[25]\n getitem_2542 = _foreach_add_3[26]\n getitem_2543 = _foreach_add_3[27]\n getitem_2544 = _foreach_add_3[28]\n getitem_2545 = _foreach_add_3[29]\n getitem_2546 = _foreach_add_3[30]\n getitem_2547 = _foreach_add_3[31]\n getitem_2548 = _foreach_add_3[32]\n getitem_2549 = _foreach_add_3[33]\n getitem_2550 = _foreach_add_3[34]\n getitem_2551 = _foreach_add_3[35]\n getitem_2552 = _foreach_add_3[36]\n getitem_2553 = _foreach_add_3[37]\n getitem_2554 = _foreach_add_3[38]\n getitem_2555 = _foreach_add_3[39]\n getitem_2556 = _foreach_add_3[40]\n getitem_2557 = _foreach_add_3[41]\n getitem_2558 = _foreach_add_3[42]\n getitem_2559 = _foreach_add_3[43]\n getitem_2560 = _foreach_add_3[44]\n getitem_2561 = _foreach_add_3[45]\n getitem_2562 = _foreach_add_3[46]\n getitem_2563 = _foreach_add_3[47]\n getitem_2564 = _foreach_add_3[48]\n getitem_2565 = _foreach_add_3[49]\n getitem_2566 = _foreach_add_3[50]\n getitem_2567 = _foreach_add_3[51]\n getitem_2568 = _foreach_add_3[52]\n getitem_2569 = _foreach_add_3[53]\n getitem_2570 = _foreach_add_3[54]\n getitem_2571 = _foreach_add_3[55]\n getitem_2572 = _foreach_add_3[56]\n getitem_2573 = _foreach_add_3[57]\n getitem_2574 = _foreach_add_3[58]\n getitem_2575 = _foreach_add_3[59]\n getitem_2576 = _foreach_add_3[60]\n getitem_2577 = _foreach_add_3[61]\n getitem_2578 = _foreach_add_3[62]\n getitem_2579 = _foreach_add_3[63]\n getitem_2580 = _foreach_add_3[64]\n getitem_2581 = _foreach_add_3[65]\n getitem_2582 = _foreach_add_3[66]\n getitem_2583 = _foreach_add_3[67]\n getitem_2584 = _foreach_add_3[68]\n getitem_2585 = _foreach_add_3[69]\n getitem_2586 = _foreach_add_3[70]\n getitem_2587 = _foreach_add_3[71]\n getitem_2588 = _foreach_add_3[72]\n getitem_2589 = _foreach_add_3[73]\n getitem_2590 = _foreach_add_3[74]\n getitem_2591 = _foreach_add_3[75]\n getitem_2592 = _foreach_add_3[76]\n getitem_2593 = _foreach_add_3[77]\n getitem_2594 = _foreach_add_3[78]\n getitem_2595 = _foreach_add_3[79]\n getitem_2596 = _foreach_add_3[80]\n getitem_2597 = _foreach_add_3[81]\n getitem_2598 = _foreach_add_3[82]\n getitem_2599 = _foreach_add_3[83]\n getitem_2600 = _foreach_add_3[84]\n getitem_2601 = _foreach_add_3[85]\n getitem_2602 = _foreach_add_3[86]\n getitem_2603 = _foreach_add_3[87]\n getitem_2604 = _foreach_add_3[88]\n getitem_2605 = _foreach_add_3[89]\n getitem_2606 = _foreach_add_3[90]\n getitem_2607 = _foreach_add_3[91]\n getitem_2608 = _foreach_add_3[92]\n getitem_2609 = _foreach_add_3[93]\n getitem_2610 = _foreach_add_3[94]\n getitem_2611 = _foreach_add_3[95]\n getitem_2612 = _foreach_add_3[96]\n getitem_2613 = _foreach_add_3[97]\n getitem_2614 = _foreach_add_3[98]\n getitem_2615 = _foreach_add_3[99]\n getitem_2616 = _foreach_add_3[100]\n getitem_2617 = _foreach_add_3[101]\n getitem_2618 = _foreach_add_3[102]\n getitem_2619 = _foreach_add_3[103]\n getitem_2620 = _foreach_add_3[104]\n getitem_2621 = _foreach_add_3[105]\n getitem_2622 = _foreach_add_3[106]\n getitem_2623 = _foreach_add_3[107]\n getitem_2624 = _foreach_add_3[108]\n getitem_2625 = _foreach_add_3[109]\n getitem_2626 = _foreach_add_3[110]\n getitem_2627 = _foreach_add_3[111]\n getitem_2628 = _foreach_add_3[112]\n getitem_2629 = _foreach_add_3[113]\n getitem_2630 = _foreach_add_3[114]\n getitem_2631 = _foreach_add_3[115]\n getitem_2632 = _foreach_add_3[116]\n getitem_2633 = _foreach_add_3[117]\n getitem_2634 = _foreach_add_3[118]\n getitem_2635 = _foreach_add_3[119]\n getitem_2636 = _foreach_add_3[120]\n getitem_2637 = _foreach_add_3[121]\n getitem_2638 = _foreach_add_3[122]\n getitem_2639 = _foreach_add_3[123]\n getitem_2640 = _foreach_add_3[124]\n getitem_2641 = _foreach_add_3[125]\n getitem_2642 = _foreach_add_3[126]\n getitem_2643 = _foreach_add_3[127]\n getitem_2644 = _foreach_add_3[128]\n getitem_2645 = _foreach_add_3[129]\n getitem_2646 = _foreach_add_3[130]\n getitem_2647 = _foreach_add_3[131]\n getitem_2648 = _foreach_add_3[132]\n getitem_2649 = _foreach_add_3[133]\n getitem_2650 = _foreach_add_3[134]\n getitem_2651 = _foreach_add_3[135]\n getitem_2652 = _foreach_add_3[136]\n getitem_2653 = _foreach_add_3[137]\n getitem_2654 = _foreach_add_3[138]\n getitem_2655 = _foreach_add_3[139]\n getitem_2656 = _foreach_add_3[140]\n getitem_2657 = _foreach_add_3[141]\n getitem_2658 = _foreach_add_3[142]\n getitem_2659 = _foreach_add_3[143]\n getitem_2660 = _foreach_add_3[144]\n getitem_2661 = _foreach_add_3[145]\n getitem_2662 = _foreach_add_3[146]\n getitem_2663 = _foreach_add_3[147]; _foreach_add_3 = None\n _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]); getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None\n getitem_2664 = _foreach_div_2[0]\n getitem_2665 = _foreach_div_2[1]\n getitem_2666 = _foreach_div_2[2]\n getitem_2667 = _foreach_div_2[3]\n getitem_2668 = _foreach_div_2[4]\n getitem_2669 = _foreach_div_2[5]\n getitem_2670 = _foreach_div_2[6]\n getitem_2671 = _foreach_div_2[7]\n getitem_2672 = _foreach_div_2[8]\n getitem_2673 = _foreach_div_2[9]\n getitem_2674 = _foreach_div_2[10]\n getitem_2675 = _foreach_div_2[11]\n getitem_2676 = _foreach_div_2[12]\n getitem_2677 = _foreach_div_2[13]\n getitem_2678 = _foreach_div_2[14]\n getitem_2679 = _foreach_div_2[15]\n getitem_2680 = _foreach_div_2[16]\n getitem_2681 = _foreach_div_2[17]\n getitem_2682 = _foreach_div_2[18]\n getitem_2683 = _foreach_div_2[19]\n getitem_2684 = _foreach_div_2[20]\n getitem_2685 = _foreach_div_2[21]\n getitem_2686 = _foreach_div_2[22]\n getitem_2687 = _foreach_div_2[23]\n getitem_2688 = _foreach_div_2[24]\n getitem_2689 = _foreach_div_2[25]\n getitem_2690 = _foreach_div_2[26]\n getitem_2691 = _foreach_div_2[27]\n getitem_2692 = _foreach_div_2[28]\n getitem_2693 = _foreach_div_2[29]\n getitem_2694 = _foreach_div_2[30]\n getitem_2695 = _foreach_div_2[31]\n getitem_2696 = _foreach_div_2[32]\n getitem_2697 = _foreach_div_2[33]\n getitem_2698 = _foreach_div_2[34]\n getitem_2699 = _foreach_div_2[35]\n getitem_2700 = _foreach_div_2[36]\n getitem_2701 = _foreach_div_2[37]\n getitem_2702 = _foreach_div_2[38]\n getitem_2703 = _foreach_div_2[39]\n getitem_2704 = _foreach_div_2[40]\n getitem_2705 = _foreach_div_2[41]\n getitem_2706 = _foreach_div_2[42]\n getitem_2707 = _foreach_div_2[43]\n getitem_2708 = _foreach_div_2[44]\n getitem_2709 = _foreach_div_2[45]\n getitem_2710 = _foreach_div_2[46]\n getitem_2711 = _foreach_div_2[47]\n getitem_2712 = _foreach_div_2[48]\n getitem_2713 = _foreach_div_2[49]\n getitem_2714 = _foreach_div_2[50]\n getitem_2715 = _foreach_div_2[51]\n getitem_2716 = _foreach_div_2[52]\n getitem_2717 = _foreach_div_2[53]\n getitem_2718 = _foreach_div_2[54]\n getitem_2719 = _foreach_div_2[55]\n getitem_2720 = _foreach_div_2[56]\n getitem_2721 = _foreach_div_2[57]\n getitem_2722 = _foreach_div_2[58]\n getitem_2723 = _foreach_div_2[59]\n getitem_2724 = _foreach_div_2[60]\n getitem_2725 = _foreach_div_2[61]\n getitem_2726 = _foreach_div_2[62]\n getitem_2727 = _foreach_div_2[63]\n getitem_2728 = _foreach_div_2[64]\n getitem_2729 = _foreach_div_2[65]\n getitem_2730 = _foreach_div_2[66]\n getitem_2731 = _foreach_div_2[67]\n getitem_2732 = _foreach_div_2[68]\n getitem_2733 = _foreach_div_2[69]\n getitem_2734 = _foreach_div_2[70]\n getitem_2735 = _foreach_div_2[71]\n getitem_2736 = _foreach_div_2[72]\n getitem_2737 = _foreach_div_2[73]\n getitem_2738 = _foreach_div_2[74]\n getitem_2739 = _foreach_div_2[75]\n getitem_2740 = _foreach_div_2[76]\n getitem_2741 = _foreach_div_2[77]\n getitem_2742 = _foreach_div_2[78]\n getitem_2743 = _foreach_div_2[79]\n getitem_2744 = _foreach_div_2[80]\n getitem_2745 = _foreach_div_2[81]\n getitem_2746 = _foreach_div_2[82]\n getitem_2747 = _foreach_div_2[83]\n getitem_2748 = _foreach_div_2[84]\n getitem_2749 = _foreach_div_2[85]\n getitem_2750 = _foreach_div_2[86]\n getitem_2751 = _foreach_div_2[87]\n getitem_2752 = _foreach_div_2[88]\n getitem_2753 = _foreach_div_2[89]\n getitem_2754 = _foreach_div_2[90]\n getitem_2755 = _foreach_div_2[91]\n getitem_2756 = _foreach_div_2[92]\n getitem_2757 = _foreach_div_2[93]\n getitem_2758 = _foreach_div_2[94]\n getitem_2759 = _foreach_div_2[95]\n getitem_2760 = _foreach_div_2[96]\n getitem_2761 = _foreach_div_2[97]\n getitem_2762 = _foreach_div_2[98]\n getitem_2763 = _foreach_div_2[99]\n getitem_2764 = _foreach_div_2[100]\n getitem_2765 = _foreach_div_2[101]\n getitem_2766 = _foreach_div_2[102]\n getitem_2767 = _foreach_div_2[103]\n getitem_2768 = _foreach_div_2[104]\n getitem_2769 = _foreach_div_2[105]\n getitem_2770 = _foreach_div_2[106]\n getitem_2771 = _foreach_div_2[107]\n getitem_2772 = _foreach_div_2[108]\n getitem_2773 = _foreach_div_2[109]\n getitem_2774 = _foreach_div_2[110]\n getitem_2775 = _foreach_div_2[111]\n getitem_2776 = _foreach_div_2[112]\n getitem_2777 = _foreach_div_2[113]\n getitem_2778 = _foreach_div_2[114]\n getitem_2779 = _foreach_div_2[115]\n getitem_2780 = _foreach_div_2[116]\n getitem_2781 = _foreach_div_2[117]\n getitem_2782 = _foreach_div_2[118]\n getitem_2783 = _foreach_div_2[119]\n getitem_2784 = _foreach_div_2[120]\n getitem_2785 = _foreach_div_2[121]\n getitem_2786 = _foreach_div_2[122]\n getitem_2787 = _foreach_div_2[123]\n getitem_2788 = _foreach_div_2[124]\n getitem_2789 = _foreach_div_2[125]\n getitem_2790 = _foreach_div_2[126]\n getitem_2791 = _foreach_div_2[127]\n getitem_2792 = _foreach_div_2[128]\n getitem_2793 = _foreach_div_2[129]\n getitem_2794 = _foreach_div_2[130]\n getitem_2795 = _foreach_div_2[131]\n getitem_2796 = _foreach_div_2[132]\n getitem_2797 = _foreach_div_2[133]\n getitem_2798 = _foreach_div_2[134]\n getitem_2799 = _foreach_div_2[135]\n getitem_2800 = _foreach_div_2[136]\n getitem_2801 = _foreach_div_2[137]\n getitem_2802 = _foreach_div_2[138]\n getitem_2803 = _foreach_div_2[139]\n getitem_2804 = _foreach_div_2[140]\n getitem_2805 = _foreach_div_2[141]\n getitem_2806 = _foreach_div_2[142]\n getitem_2807 = _foreach_div_2[143]\n getitem_2808 = _foreach_div_2[144]\n getitem_2809 = _foreach_div_2[145]\n getitem_2810 = _foreach_div_2[146]\n getitem_2811 = _foreach_div_2[147]; _foreach_div_2 = None\n _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]); getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None\n getitem_2812 = _foreach_div_3[0]\n getitem_2813 = _foreach_div_3[1]\n getitem_2814 = _foreach_div_3[2]\n getitem_2815 = _foreach_div_3[3]\n getitem_2816 = _foreach_div_3[4]\n getitem_2817 = _foreach_div_3[5]\n getitem_2818 = _foreach_div_3[6]\n getitem_2819 = _foreach_div_3[7]\n getitem_2820 = _foreach_div_3[8]\n getitem_2821 = _foreach_div_3[9]\n getitem_2822 = _foreach_div_3[10]\n getitem_2823 = _foreach_div_3[11]\n getitem_2824 = _foreach_div_3[12]\n getitem_2825 = _foreach_div_3[13]\n getitem_2826 = _foreach_div_3[14]\n getitem_2827 = _foreach_div_3[15]\n getitem_2828 = _foreach_div_3[16]\n getitem_2829 = _foreach_div_3[17]\n getitem_2830 = _foreach_div_3[18]\n getitem_2831 = _foreach_div_3[19]\n getitem_2832 = _foreach_div_3[20]\n getitem_2833 = _foreach_div_3[21]\n getitem_2834 = _foreach_div_3[22]\n getitem_2835 = _foreach_div_3[23]\n getitem_2836 = _foreach_div_3[24]\n getitem_2837 = _foreach_div_3[25]\n getitem_2838 = _foreach_div_3[26]\n getitem_2839 = _foreach_div_3[27]\n getitem_2840 = _foreach_div_3[28]\n getitem_2841 = _foreach_div_3[29]\n getitem_2842 = _foreach_div_3[30]\n getitem_2843 = _foreach_div_3[31]\n getitem_2844 = _foreach_div_3[32]\n getitem_2845 = _foreach_div_3[33]\n getitem_2846 = _foreach_div_3[34]\n getitem_2847 = _foreach_div_3[35]\n getitem_2848 = _foreach_div_3[36]\n getitem_2849 = _foreach_div_3[37]\n getitem_2850 = _foreach_div_3[38]\n getitem_2851 = _foreach_div_3[39]\n getitem_2852 = _foreach_div_3[40]\n getitem_2853 = _foreach_div_3[41]\n getitem_2854 = _foreach_div_3[42]\n getitem_2855 = _foreach_div_3[43]\n getitem_2856 = _foreach_div_3[44]\n getitem_2857 = _foreach_div_3[45]\n getitem_2858 = _foreach_div_3[46]\n getitem_2859 = _foreach_div_3[47]\n getitem_2860 = _foreach_div_3[48]\n getitem_2861 = _foreach_div_3[49]\n getitem_2862 = _foreach_div_3[50]\n getitem_2863 = _foreach_div_3[51]\n getitem_2864 = _foreach_div_3[52]\n getitem_2865 = _foreach_div_3[53]\n getitem_2866 = _foreach_div_3[54]\n getitem_2867 = _foreach_div_3[55]\n getitem_2868 = _foreach_div_3[56]\n getitem_2869 = _foreach_div_3[57]\n getitem_2870 = _foreach_div_3[58]\n getitem_2871 = _foreach_div_3[59]\n getitem_2872 = _foreach_div_3[60]\n getitem_2873 = _foreach_div_3[61]\n getitem_2874 = _foreach_div_3[62]\n getitem_2875 = _foreach_div_3[63]\n getitem_2876 = _foreach_div_3[64]\n getitem_2877 = _foreach_div_3[65]\n getitem_2878 = _foreach_div_3[66]\n getitem_2879 = _foreach_div_3[67]\n getitem_2880 = _foreach_div_3[68]\n getitem_2881 = _foreach_div_3[69]\n getitem_2882 = _foreach_div_3[70]\n getitem_2883 = _foreach_div_3[71]\n getitem_2884 = _foreach_div_3[72]\n getitem_2885 = _foreach_div_3[73]\n getitem_2886 = _foreach_div_3[74]\n getitem_2887 = _foreach_div_3[75]\n getitem_2888 = _foreach_div_3[76]\n getitem_2889 = _foreach_div_3[77]\n getitem_2890 = _foreach_div_3[78]\n getitem_2891 = _foreach_div_3[79]\n getitem_2892 = _foreach_div_3[80]\n getitem_2893 = _foreach_div_3[81]\n getitem_2894 = _foreach_div_3[82]\n getitem_2895 = _foreach_div_3[83]\n getitem_2896 = _foreach_div_3[84]\n getitem_2897 = _foreach_div_3[85]\n getitem_2898 = _foreach_div_3[86]\n getitem_2899 = _foreach_div_3[87]\n getitem_2900 = _foreach_div_3[88]\n getitem_2901 = _foreach_div_3[89]\n getitem_2902 = _foreach_div_3[90]\n getitem_2903 = _foreach_div_3[91]\n getitem_2904 = _foreach_div_3[92]\n getitem_2905 = _foreach_div_3[93]\n getitem_2906 = _foreach_div_3[94]\n getitem_2907 = _foreach_div_3[95]\n getitem_2908 = _foreach_div_3[96]\n getitem_2909 = _foreach_div_3[97]\n getitem_2910 = _foreach_div_3[98]\n getitem_2911 = _foreach_div_3[99]\n getitem_2912 = _foreach_div_3[100]\n getitem_2913 = _foreach_div_3[101]\n getitem_2914 = _foreach_div_3[102]\n getitem_2915 = _foreach_div_3[103]\n getitem_2916 = _foreach_div_3[104]\n getitem_2917 = _foreach_div_3[105]\n getitem_2918 = _foreach_div_3[106]\n getitem_2919 = _foreach_div_3[107]\n getitem_2920 = _foreach_div_3[108]\n getitem_2921 = _foreach_div_3[109]\n getitem_2922 = _foreach_div_3[110]\n getitem_2923 = _foreach_div_3[111]\n getitem_2924 = _foreach_div_3[112]\n getitem_2925 = _foreach_div_3[113]\n getitem_2926 = _foreach_div_3[114]\n getitem_2927 = _foreach_div_3[115]\n getitem_2928 = _foreach_div_3[116]\n getitem_2929 = _foreach_div_3[117]\n getitem_2930 = _foreach_div_3[118]\n getitem_2931 = _foreach_div_3[119]\n getitem_2932 = _foreach_div_3[120]\n getitem_2933 = _foreach_div_3[121]\n getitem_2934 = _foreach_div_3[122]\n getitem_2935 = _foreach_div_3[123]\n getitem_2936 = _foreach_div_3[124]\n getitem_2937 = _foreach_div_3[125]\n getitem_2938 = _foreach_div_3[126]\n getitem_2939 = _foreach_div_3[127]\n getitem_2940 = _foreach_div_3[128]\n getitem_2941 = _foreach_div_3[129]\n getitem_2942 = _foreach_div_3[130]\n getitem_2943 = _foreach_div_3[131]\n getitem_2944 = _foreach_div_3[132]\n getitem_2945 = _foreach_div_3[133]\n getitem_2946 = _foreach_div_3[134]\n getitem_2947 = _foreach_div_3[135]\n getitem_2948 = _foreach_div_3[136]\n getitem_2949 = _foreach_div_3[137]\n getitem_2950 = _foreach_div_3[138]\n getitem_2951 = _foreach_div_3[139]\n getitem_2952 = _foreach_div_3[140]\n getitem_2953 = _foreach_div_3[141]\n getitem_2954 = _foreach_div_3[142]\n getitem_2955 = _foreach_div_3[143]\n getitem_2956 = _foreach_div_3[144]\n getitem_2957 = _foreach_div_3[145]\n getitem_2958 = _foreach_div_3[146]\n getitem_2959 = _foreach_div_3[147]; _foreach_div_3 = None\n _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]); getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None\n getitem_2960 = _foreach_add_4[0]\n getitem_2961 = _foreach_add_4[1]\n getitem_2962 = _foreach_add_4[2]\n getitem_2963 = _foreach_add_4[3]\n getitem_2964 = _foreach_add_4[4]\n getitem_2965 = _foreach_add_4[5]\n getitem_2966 = _foreach_add_4[6]\n getitem_2967 = _foreach_add_4[7]\n getitem_2968 = _foreach_add_4[8]\n getitem_2969 = _foreach_add_4[9]\n getitem_2970 = _foreach_add_4[10]\n getitem_2971 = _foreach_add_4[11]\n getitem_2972 = _foreach_add_4[12]\n getitem_2973 = _foreach_add_4[13]\n getitem_2974 = _foreach_add_4[14]\n getitem_2975 = _foreach_add_4[15]\n getitem_2976 = _foreach_add_4[16]\n getitem_2977 = _foreach_add_4[17]\n getitem_2978 = _foreach_add_4[18]\n getitem_2979 = _foreach_add_4[19]\n getitem_2980 = _foreach_add_4[20]\n getitem_2981 = _foreach_add_4[21]\n getitem_2982 = _foreach_add_4[22]\n getitem_2983 = _foreach_add_4[23]\n getitem_2984 = _foreach_add_4[24]\n getitem_2985 = _foreach_add_4[25]\n getitem_2986 = _foreach_add_4[26]\n getitem_2987 = _foreach_add_4[27]\n getitem_2988 = _foreach_add_4[28]\n getitem_2989 = _foreach_add_4[29]\n getitem_2990 = _foreach_add_4[30]\n getitem_2991 = _foreach_add_4[31]\n getitem_2992 = _foreach_add_4[32]\n getitem_2993 = _foreach_add_4[33]\n getitem_2994 = _foreach_add_4[34]\n getitem_2995 = _foreach_add_4[35]\n getitem_2996 = _foreach_add_4[36]\n getitem_2997 = _foreach_add_4[37]\n getitem_2998 = _foreach_add_4[38]\n getitem_2999 = _foreach_add_4[39]\n getitem_3000 = _foreach_add_4[40]\n getitem_3001 = _foreach_add_4[41]\n getitem_3002 = _foreach_add_4[42]\n getitem_3003 = _foreach_add_4[43]\n getitem_3004 = _foreach_add_4[44]\n getitem_3005 = _foreach_add_4[45]\n getitem_3006 = _foreach_add_4[46]\n getitem_3007 = _foreach_add_4[47]\n getitem_3008 = _foreach_add_4[48]\n getitem_3009 = _foreach_add_4[49]\n getitem_3010 = _foreach_add_4[50]\n getitem_3011 = _foreach_add_4[51]\n getitem_3012 = _foreach_add_4[52]\n getitem_3013 = _foreach_add_4[53]\n getitem_3014 = _foreach_add_4[54]\n getitem_3015 = _foreach_add_4[55]\n getitem_3016 = _foreach_add_4[56]\n getitem_3017 = _foreach_add_4[57]\n getitem_3018 = _foreach_add_4[58]\n getitem_3019 = _foreach_add_4[59]\n getitem_3020 = _foreach_add_4[60]\n getitem_3021 = _foreach_add_4[61]\n getitem_3022 = _foreach_add_4[62]\n getitem_3023 = _foreach_add_4[63]\n getitem_3024 = _foreach_add_4[64]\n getitem_3025 = _foreach_add_4[65]\n getitem_3026 = _foreach_add_4[66]\n getitem_3027 = _foreach_add_4[67]\n getitem_3028 = _foreach_add_4[68]\n getitem_3029 = _foreach_add_4[69]\n getitem_3030 = _foreach_add_4[70]\n getitem_3031 = _foreach_add_4[71]\n getitem_3032 = _foreach_add_4[72]\n getitem_3033 = _foreach_add_4[73]\n getitem_3034 = _foreach_add_4[74]\n getitem_3035 = _foreach_add_4[75]\n getitem_3036 = _foreach_add_4[76]\n getitem_3037 = _foreach_add_4[77]\n getitem_3038 = _foreach_add_4[78]\n getitem_3039 = _foreach_add_4[79]\n getitem_3040 = _foreach_add_4[80]\n getitem_3041 = _foreach_add_4[81]\n getitem_3042 = _foreach_add_4[82]\n getitem_3043 = _foreach_add_4[83]\n getitem_3044 = _foreach_add_4[84]\n getitem_3045 = _foreach_add_4[85]\n getitem_3046 = _foreach_add_4[86]\n getitem_3047 = _foreach_add_4[87]\n getitem_3048 = _foreach_add_4[88]\n getitem_3049 = _foreach_add_4[89]\n getitem_3050 = _foreach_add_4[90]\n getitem_3051 = _foreach_add_4[91]\n getitem_3052 = _foreach_add_4[92]\n getitem_3053 = _foreach_add_4[93]\n getitem_3054 = _foreach_add_4[94]\n getitem_3055 = _foreach_add_4[95]\n getitem_3056 = _foreach_add_4[96]\n getitem_3057 = _foreach_add_4[97]\n getitem_3058 = _foreach_add_4[98]\n getitem_3059 = _foreach_add_4[99]\n getitem_3060 = _foreach_add_4[100]\n getitem_3061 = _foreach_add_4[101]\n getitem_3062 = _foreach_add_4[102]\n getitem_3063 = _foreach_add_4[103]\n getitem_3064 = _foreach_add_4[104]\n getitem_3065 = _foreach_add_4[105]\n getitem_3066 = _foreach_add_4[106]\n getitem_3067 = _foreach_add_4[107]\n getitem_3068 = _foreach_add_4[108]\n getitem_3069 = _foreach_add_4[109]\n getitem_3070 = _foreach_add_4[110]\n getitem_3071 = _foreach_add_4[111]\n getitem_3072 = _foreach_add_4[112]\n getitem_3073 = _foreach_add_4[113]\n getitem_3074 = _foreach_add_4[114]\n getitem_3075 = _foreach_add_4[115]\n getitem_3076 = _foreach_add_4[116]\n getitem_3077 = _foreach_add_4[117]\n getitem_3078 = _foreach_add_4[118]\n getitem_3079 = _foreach_add_4[119]\n getitem_3080 = _foreach_add_4[120]\n getitem_3081 = _foreach_add_4[121]\n getitem_3082 = _foreach_add_4[122]\n getitem_3083 = _foreach_add_4[123]\n getitem_3084 = _foreach_add_4[124]\n getitem_3085 = _foreach_add_4[125]\n getitem_3086 = _foreach_add_4[126]\n getitem_3087 = _foreach_add_4[127]\n getitem_3088 = _foreach_add_4[128]\n getitem_3089 = _foreach_add_4[129]\n getitem_3090 = _foreach_add_4[130]\n getitem_3091 = _foreach_add_4[131]\n getitem_3092 = _foreach_add_4[132]\n getitem_3093 = _foreach_add_4[133]\n getitem_3094 = _foreach_add_4[134]\n getitem_3095 = _foreach_add_4[135]\n getitem_3096 = _foreach_add_4[136]\n getitem_3097 = _foreach_add_4[137]\n getitem_3098 = _foreach_add_4[138]\n getitem_3099 = _foreach_add_4[139]\n getitem_3100 = _foreach_add_4[140]\n getitem_3101 = _foreach_add_4[141]\n getitem_3102 = _foreach_add_4[142]\n getitem_3103 = _foreach_add_4[143]\n getitem_3104 = _foreach_add_4[144]\n getitem_3105 = _foreach_add_4[145]\n getitem_3106 = _foreach_add_4[146]\n getitem_3107 = _foreach_add_4[147]; _foreach_add_4 = None\n copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960); arg0_1 = getitem_2960 = copy_ = None\n copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961); arg1_1 = getitem_2961 = copy__1 = None\n copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962); arg2_1 = getitem_2962 = copy__2 = None\n copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963); arg3_1 = getitem_2963 = copy__3 = None\n copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964); arg4_1 = getitem_2964 = copy__4 = None\n copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965); arg5_1 = getitem_2965 = copy__5 = None\n copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966); arg6_1 = getitem_2966 = copy__6 = None\n copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967); arg7_1 = getitem_2967 = copy__7 = None\n copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968); arg8_1 = getitem_2968 = copy__8 = None\n copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969); arg9_1 = getitem_2969 = copy__9 = None\n copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970); arg10_1 = getitem_2970 = copy__10 = None\n copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971); arg11_1 = getitem_2971 = copy__11 = None\n copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972); arg12_1 = getitem_2972 = copy__12 = None\n copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973); arg13_1 = getitem_2973 = copy__13 = None\n copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974); arg14_1 = getitem_2974 = copy__14 = None\n copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975); arg15_1 = getitem_2975 = copy__15 = None\n copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976); arg16_1 = getitem_2976 = copy__16 = None\n copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977); arg17_1 = getitem_2977 = copy__17 = None\n copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978); arg18_1 = getitem_2978 = copy__18 = None\n copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979); arg19_1 = getitem_2979 = copy__19 = None\n copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980); arg20_1 = getitem_2980 = copy__20 = None\n copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981); arg21_1 = getitem_2981 = copy__21 = None\n copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982); arg22_1 = getitem_2982 = copy__22 = None\n copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983); arg23_1 = getitem_2983 = copy__23 = None\n copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984); arg24_1 = getitem_2984 = copy__24 = None\n copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985); arg25_1 = getitem_2985 = copy__25 = None\n copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986); arg26_1 = getitem_2986 = copy__26 = None\n copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987); arg27_1 = getitem_2987 = copy__27 = None\n copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988); arg28_1 = getitem_2988 = copy__28 = None\n copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989); arg29_1 = getitem_2989 = copy__29 = None\n copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990); arg30_1 = getitem_2990 = copy__30 = None\n copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991); arg31_1 = getitem_2991 = copy__31 = None\n copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992); arg32_1 = getitem_2992 = copy__32 = None\n copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993); arg33_1 = getitem_2993 = copy__33 = None\n copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994); arg34_1 = getitem_2994 = copy__34 = None\n copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995); arg35_1 = getitem_2995 = copy__35 = None\n copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996); arg36_1 = getitem_2996 = copy__36 = None\n copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997); arg37_1 = getitem_2997 = copy__37 = None\n copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998); arg38_1 = getitem_2998 = copy__38 = None\n copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999); arg39_1 = getitem_2999 = copy__39 = None\n copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000); arg40_1 = getitem_3000 = copy__40 = None\n copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001); arg41_1 = getitem_3001 = copy__41 = None\n copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002); arg42_1 = getitem_3002 = copy__42 = None\n copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003); arg43_1 = getitem_3003 = copy__43 = None\n copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004); arg44_1 = getitem_3004 = copy__44 = None\n copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005); arg45_1 = getitem_3005 = copy__45 = None\n copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006); arg46_1 = getitem_3006 = copy__46 = None\n copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007); arg47_1 = getitem_3007 = copy__47 = None\n copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008); arg48_1 = getitem_3008 = copy__48 = None\n copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009); arg49_1 = getitem_3009 = copy__49 = None\n copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010); arg50_1 = getitem_3010 = copy__50 = None\n copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011); arg51_1 = getitem_3011 = copy__51 = None\n copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012); arg52_1 = getitem_3012 = copy__52 = None\n copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013); arg53_1 = getitem_3013 = copy__53 = None\n copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014); arg54_1 = getitem_3014 = copy__54 = None\n copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015); arg55_1 = getitem_3015 = copy__55 = None\n copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016); arg56_1 = getitem_3016 = copy__56 = None\n copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017); arg57_1 = getitem_3017 = copy__57 = None\n copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018); arg58_1 = getitem_3018 = copy__58 = None\n copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019); arg59_1 = getitem_3019 = copy__59 = None\n copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020); arg60_1 = getitem_3020 = copy__60 = None\n copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021); arg61_1 = getitem_3021 = copy__61 = None\n copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022); arg62_1 = getitem_3022 = copy__62 = None\n copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023); arg63_1 = getitem_3023 = copy__63 = None\n copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024); arg64_1 = getitem_3024 = copy__64 = None\n copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025); arg65_1 = getitem_3025 = copy__65 = None\n copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026); arg66_1 = getitem_3026 = copy__66 = None\n copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027); arg67_1 = getitem_3027 = copy__67 = None\n copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028); arg68_1 = getitem_3028 = copy__68 = None\n copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029); arg69_1 = getitem_3029 = copy__69 = None\n copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030); arg70_1 = getitem_3030 = copy__70 = None\n copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031); arg71_1 = getitem_3031 = copy__71 = None\n copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032); arg72_1 = getitem_3032 = copy__72 = None\n copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033); arg73_1 = getitem_3033 = copy__73 = None\n copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034); arg74_1 = getitem_3034 = copy__74 = None\n copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035); arg75_1 = getitem_3035 = copy__75 = None\n copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036); arg76_1 = getitem_3036 = copy__76 = None\n copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037); arg77_1 = getitem_3037 = copy__77 = None\n copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038); arg78_1 = getitem_3038 = copy__78 = None\n copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039); arg79_1 = getitem_3039 = copy__79 = None\n copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040); arg80_1 = getitem_3040 = copy__80 = None\n copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041); arg81_1 = getitem_3041 = copy__81 = None\n copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042); arg82_1 = getitem_3042 = copy__82 = None\n copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043); arg83_1 = getitem_3043 = copy__83 = None\n copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044); arg84_1 = getitem_3044 = copy__84 = None\n copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045); arg85_1 = getitem_3045 = copy__85 = None\n copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046); arg86_1 = getitem_3046 = copy__86 = None\n copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047); arg87_1 = getitem_3047 = copy__87 = None\n copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048); arg88_1 = getitem_3048 = copy__88 = None\n copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049); arg89_1 = getitem_3049 = copy__89 = None\n copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050); arg90_1 = getitem_3050 = copy__90 = None\n copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051); arg91_1 = getitem_3051 = copy__91 = None\n copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052); arg92_1 = getitem_3052 = copy__92 = None\n copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053); arg93_1 = getitem_3053 = copy__93 = None\n copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054); arg94_1 = getitem_3054 = copy__94 = None\n copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055); arg95_1 = getitem_3055 = copy__95 = None\n copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056); arg96_1 = getitem_3056 = copy__96 = None\n copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057); arg97_1 = getitem_3057 = copy__97 = None\n copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058); arg98_1 = getitem_3058 = copy__98 = None\n copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059); arg99_1 = getitem_3059 = copy__99 = None\n copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060); arg100_1 = getitem_3060 = copy__100 = None\n copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061); arg101_1 = getitem_3061 = copy__101 = None\n copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062); arg102_1 = getitem_3062 = copy__102 = None\n copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063); arg103_1 = getitem_3063 = copy__103 = None\n copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064); arg104_1 = getitem_3064 = copy__104 = None\n copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065); arg105_1 = getitem_3065 = copy__105 = None\n copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066); arg106_1 = getitem_3066 = copy__106 = None\n copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067); arg107_1 = getitem_3067 = copy__107 = None\n copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068); arg108_1 = getitem_3068 = copy__108 = None\n copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069); arg109_1 = getitem_3069 = copy__109 = None\n copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070); arg110_1 = getitem_3070 = copy__110 = None\n copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071); arg111_1 = getitem_3071 = copy__111 = None\n copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072); arg112_1 = getitem_3072 = copy__112 = None\n copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073); arg113_1 = getitem_3073 = copy__113 = None\n copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074); arg114_1 = getitem_3074 = copy__114 = None\n copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075); arg115_1 = getitem_3075 = copy__115 = None\n copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076); arg116_1 = getitem_3076 = copy__116 = None\n copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077); arg117_1 = getitem_3077 = copy__117 = None\n copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078); arg118_1 = getitem_3078 = copy__118 = None\n copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079); arg119_1 = getitem_3079 = copy__119 = None\n copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080); arg120_1 = getitem_3080 = copy__120 = None\n copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081); arg121_1 = getitem_3081 = copy__121 = None\n copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082); arg122_1 = getitem_3082 = copy__122 = None\n copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083); arg123_1 = getitem_3083 = copy__123 = None\n copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084); arg124_1 = getitem_3084 = copy__124 = None\n copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085); arg125_1 = getitem_3085 = copy__125 = None\n copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086); arg126_1 = getitem_3086 = copy__126 = None\n copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087); arg127_1 = getitem_3087 = copy__127 = None\n copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088); arg128_1 = getitem_3088 = copy__128 = None\n copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089); arg129_1 = getitem_3089 = copy__129 = None\n copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090); arg130_1 = getitem_3090 = copy__130 = None\n copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091); arg131_1 = getitem_3091 = copy__131 = None\n copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092); arg132_1 = getitem_3092 = copy__132 = None\n copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093); arg133_1 = getitem_3093 = copy__133 = None\n copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094); arg134_1 = getitem_3094 = copy__134 = None\n copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095); arg135_1 = getitem_3095 = copy__135 = None\n copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096); arg136_1 = getitem_3096 = copy__136 = None\n copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097); arg137_1 = getitem_3097 = copy__137 = None\n copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098); arg138_1 = getitem_3098 = copy__138 = None\n copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099); arg139_1 = getitem_3099 = copy__139 = None\n copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100); arg140_1 = getitem_3100 = copy__140 = None\n copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101); arg141_1 = getitem_3101 = copy__141 = None\n copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102); arg142_1 = getitem_3102 = copy__142 = None\n copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103); arg143_1 = getitem_3103 = copy__143 = None\n copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104); arg144_1 = getitem_3104 = copy__144 = None\n copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105); arg145_1 = getitem_3105 = copy__145 = None\n copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106); arg146_1 = getitem_3106 = copy__146 = None\n copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107); arg147_1 = getitem_3107 = copy__147 = None\n copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1); arg148_1 = getitem_1 = copy__148 = None\n copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445); arg149_1 = getitem_445 = copy__149 = None\n copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889); arg150_1 = getitem_889 = copy__150 = None\n copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444); arg299_1 = getitem_444 = copy__151 = None\n copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446); arg300_1 = getitem_446 = copy__152 = None\n copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447); arg301_1 = getitem_447 = copy__153 = None\n copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448); arg302_1 = getitem_448 = copy__154 = None\n copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449); arg303_1 = getitem_449 = copy__155 = None\n copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450); arg304_1 = getitem_450 = copy__156 = None\n copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451); arg305_1 = getitem_451 = copy__157 = None\n copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452); arg306_1 = getitem_452 = copy__158 = None\n copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453); arg307_1 = getitem_453 = copy__159 = None\n copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454); arg308_1 = getitem_454 = copy__160 = None\n copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455); arg309_1 = getitem_455 = copy__161 = None\n copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456); arg310_1 = getitem_456 = copy__162 = None\n copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457); arg311_1 = getitem_457 = copy__163 = None\n copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458); arg312_1 = getitem_458 = copy__164 = None\n copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459); arg313_1 = getitem_459 = copy__165 = None\n copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460); arg314_1 = getitem_460 = copy__166 = None\n copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461); arg315_1 = getitem_461 = copy__167 = None\n copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462); arg316_1 = getitem_462 = copy__168 = None\n copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463); arg317_1 = getitem_463 = copy__169 = None\n copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464); arg318_1 = getitem_464 = copy__170 = None\n copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465); arg319_1 = getitem_465 = copy__171 = None\n copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466); arg320_1 = getitem_466 = copy__172 = None\n copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467); arg321_1 = getitem_467 = copy__173 = None\n copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468); arg322_1 = getitem_468 = copy__174 = None\n copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469); arg323_1 = getitem_469 = copy__175 = None\n copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470); arg324_1 = getitem_470 = copy__176 = None\n copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471); arg325_1 = getitem_471 = copy__177 = None\n copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472); arg326_1 = getitem_472 = copy__178 = None\n copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473); arg327_1 = getitem_473 = copy__179 = None\n copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474); arg328_1 = getitem_474 = copy__180 = None\n copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475); arg329_1 = getitem_475 = copy__181 = None\n copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476); arg330_1 = getitem_476 = copy__182 = None\n copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477); arg331_1 = getitem_477 = copy__183 = None\n copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478); arg332_1 = getitem_478 = copy__184 = None\n copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479); arg333_1 = getitem_479 = copy__185 = None\n copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480); arg334_1 = getitem_480 = copy__186 = None\n copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481); arg335_1 = getitem_481 = copy__187 = None\n copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482); arg336_1 = getitem_482 = copy__188 = None\n copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483); arg337_1 = getitem_483 = copy__189 = None\n copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484); arg338_1 = getitem_484 = copy__190 = None\n copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485); arg339_1 = getitem_485 = copy__191 = None\n copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486); arg340_1 = getitem_486 = copy__192 = None\n copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487); arg341_1 = getitem_487 = copy__193 = None\n copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488); arg342_1 = getitem_488 = copy__194 = None\n copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489); arg343_1 = getitem_489 = copy__195 = None\n copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490); arg344_1 = getitem_490 = copy__196 = None\n copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491); arg345_1 = getitem_491 = copy__197 = None\n copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492); arg346_1 = getitem_492 = copy__198 = None\n copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493); arg347_1 = getitem_493 = copy__199 = None\n copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494); arg348_1 = getitem_494 = copy__200 = None\n copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495); arg349_1 = getitem_495 = copy__201 = None\n copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496); arg350_1 = getitem_496 = copy__202 = None\n copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497); arg351_1 = getitem_497 = copy__203 = None\n copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498); arg352_1 = getitem_498 = copy__204 = None\n copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499); arg353_1 = getitem_499 = copy__205 = None\n copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500); arg354_1 = getitem_500 = copy__206 = None\n copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501); arg355_1 = getitem_501 = copy__207 = None\n copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502); arg356_1 = getitem_502 = copy__208 = None\n copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503); arg357_1 = getitem_503 = copy__209 = None\n copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504); arg358_1 = getitem_504 = copy__210 = None\n copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505); arg359_1 = getitem_505 = copy__211 = None\n copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506); arg360_1 = getitem_506 = copy__212 = None\n copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507); arg361_1 = getitem_507 = copy__213 = None\n copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508); arg362_1 = getitem_508 = copy__214 = None\n copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509); arg363_1 = getitem_509 = copy__215 = None\n copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510); arg364_1 = getitem_510 = copy__216 = None\n copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511); arg365_1 = getitem_511 = copy__217 = None\n copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512); arg366_1 = getitem_512 = copy__218 = None\n copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513); arg367_1 = getitem_513 = copy__219 = None\n copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514); arg368_1 = getitem_514 = copy__220 = None\n copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515); arg369_1 = getitem_515 = copy__221 = None\n copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516); arg370_1 = getitem_516 = copy__222 = None\n copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517); arg371_1 = getitem_517 = copy__223 = None\n copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518); arg372_1 = getitem_518 = copy__224 = None\n copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519); arg373_1 = getitem_519 = copy__225 = None\n copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520); arg374_1 = getitem_520 = copy__226 = None\n copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521); arg375_1 = getitem_521 = copy__227 = None\n copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522); arg376_1 = getitem_522 = copy__228 = None\n copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523); arg377_1 = getitem_523 = copy__229 = None\n copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524); arg378_1 = getitem_524 = copy__230 = None\n copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525); arg379_1 = getitem_525 = copy__231 = None\n copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526); arg380_1 = getitem_526 = copy__232 = None\n copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527); arg381_1 = getitem_527 = copy__233 = None\n copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528); arg382_1 = getitem_528 = copy__234 = None\n copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529); arg383_1 = getitem_529 = copy__235 = None\n copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530); arg384_1 = getitem_530 = copy__236 = None\n copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531); arg385_1 = getitem_531 = copy__237 = None\n copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532); arg386_1 = getitem_532 = copy__238 = None\n copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533); arg387_1 = getitem_533 = copy__239 = None\n copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534); arg388_1 = getitem_534 = copy__240 = None\n copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535); arg389_1 = getitem_535 = copy__241 = None\n copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536); arg390_1 = getitem_536 = copy__242 = None\n copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537); arg391_1 = getitem_537 = copy__243 = None\n copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538); arg392_1 = getitem_538 = copy__244 = None\n copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539); arg393_1 = getitem_539 = copy__245 = None\n copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540); arg394_1 = getitem_540 = copy__246 = None\n copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541); arg395_1 = getitem_541 = copy__247 = None\n copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542); arg396_1 = getitem_542 = copy__248 = None\n copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543); arg397_1 = getitem_543 = copy__249 = None\n copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544); arg398_1 = getitem_544 = copy__250 = None\n copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545); arg399_1 = getitem_545 = copy__251 = None\n copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546); arg400_1 = getitem_546 = copy__252 = None\n copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547); arg401_1 = getitem_547 = copy__253 = None\n copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548); arg402_1 = getitem_548 = copy__254 = None\n copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549); arg403_1 = getitem_549 = copy__255 = None\n copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550); arg404_1 = getitem_550 = copy__256 = None\n copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551); arg405_1 = getitem_551 = copy__257 = None\n copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552); arg406_1 = getitem_552 = copy__258 = None\n copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553); arg407_1 = getitem_553 = copy__259 = None\n copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554); arg408_1 = getitem_554 = copy__260 = None\n copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555); arg409_1 = getitem_555 = copy__261 = None\n copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556); arg410_1 = getitem_556 = copy__262 = None\n copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557); arg411_1 = getitem_557 = copy__263 = None\n copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558); arg412_1 = getitem_558 = copy__264 = None\n copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559); arg413_1 = getitem_559 = copy__265 = None\n copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560); arg414_1 = getitem_560 = copy__266 = None\n copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561); arg415_1 = getitem_561 = copy__267 = None\n copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562); arg416_1 = getitem_562 = copy__268 = None\n copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563); arg417_1 = getitem_563 = copy__269 = None\n copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564); arg418_1 = getitem_564 = copy__270 = None\n copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565); arg419_1 = getitem_565 = copy__271 = None\n copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566); arg420_1 = getitem_566 = copy__272 = None\n copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567); arg421_1 = getitem_567 = copy__273 = None\n copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568); arg422_1 = getitem_568 = copy__274 = None\n copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569); arg423_1 = getitem_569 = copy__275 = None\n copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570); arg424_1 = getitem_570 = copy__276 = None\n copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571); arg425_1 = getitem_571 = copy__277 = None\n copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572); arg426_1 = getitem_572 = copy__278 = None\n copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573); arg427_1 = getitem_573 = copy__279 = None\n copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574); arg428_1 = getitem_574 = copy__280 = None\n copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575); arg429_1 = getitem_575 = copy__281 = None\n copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576); arg430_1 = getitem_576 = copy__282 = None\n copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577); arg431_1 = getitem_577 = copy__283 = None\n copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578); arg432_1 = getitem_578 = copy__284 = None\n copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579); arg433_1 = getitem_579 = copy__285 = None\n copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580); arg434_1 = getitem_580 = copy__286 = None\n copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581); arg435_1 = getitem_581 = copy__287 = None\n copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582); arg436_1 = getitem_582 = copy__288 = None\n copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583); arg437_1 = getitem_583 = copy__289 = None\n copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584); arg438_1 = getitem_584 = copy__290 = None\n copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585); arg439_1 = getitem_585 = copy__291 = None\n copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586); arg440_1 = getitem_586 = copy__292 = None\n copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587); arg441_1 = getitem_587 = copy__293 = None\n copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588); arg442_1 = getitem_588 = copy__294 = None\n copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589); arg443_1 = getitem_589 = copy__295 = None\n copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590); arg444_1 = getitem_590 = copy__296 = None\n copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591); arg445_1 = getitem_591 = copy__297 = None\n copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888); arg446_1 = getitem_888 = copy__298 = None\n copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890); arg447_1 = getitem_890 = copy__299 = None\n copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891); arg448_1 = getitem_891 = copy__300 = None\n copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892); arg449_1 = getitem_892 = copy__301 = None\n copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893); arg450_1 = getitem_893 = copy__302 = None\n copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894); arg451_1 = getitem_894 = copy__303 = None\n copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895); arg452_1 = getitem_895 = copy__304 = None\n copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896); arg453_1 = getitem_896 = copy__305 = None\n copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897); arg454_1 = getitem_897 = copy__306 = None\n copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898); arg455_1 = getitem_898 = copy__307 = None\n copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899); arg456_1 = getitem_899 = copy__308 = None\n copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900); arg457_1 = getitem_900 = copy__309 = None\n copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901); arg458_1 = getitem_901 = copy__310 = None\n copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902); arg459_1 = getitem_902 = copy__311 = None\n copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903); arg460_1 = getitem_903 = copy__312 = None\n copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904); arg461_1 = getitem_904 = copy__313 = None\n copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905); arg462_1 = getitem_905 = copy__314 = None\n copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906); arg463_1 = getitem_906 = copy__315 = None\n copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907); arg464_1 = getitem_907 = copy__316 = None\n copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908); arg465_1 = getitem_908 = copy__317 = None\n copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909); arg466_1 = getitem_909 = copy__318 = None\n copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910); arg467_1 = getitem_910 = copy__319 = None\n copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911); arg468_1 = getitem_911 = copy__320 = None\n copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912); arg469_1 = getitem_912 = copy__321 = None\n copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913); arg470_1 = getitem_913 = copy__322 = None\n copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914); arg471_1 = getitem_914 = copy__323 = None\n copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915); arg472_1 = getitem_915 = copy__324 = None\n copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916); arg473_1 = getitem_916 = copy__325 = None\n copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917); arg474_1 = getitem_917 = copy__326 = None\n copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918); arg475_1 = getitem_918 = copy__327 = None\n copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919); arg476_1 = getitem_919 = copy__328 = None\n copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920); arg477_1 = getitem_920 = copy__329 = None\n copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921); arg478_1 = getitem_921 = copy__330 = None\n copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922); arg479_1 = getitem_922 = copy__331 = None\n copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923); arg480_1 = getitem_923 = copy__332 = None\n copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924); arg481_1 = getitem_924 = copy__333 = None\n copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925); arg482_1 = getitem_925 = copy__334 = None\n copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926); arg483_1 = getitem_926 = copy__335 = None\n copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927); arg484_1 = getitem_927 = copy__336 = None\n copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928); arg485_1 = getitem_928 = copy__337 = None\n copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929); arg486_1 = getitem_929 = copy__338 = None\n copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930); arg487_1 = getitem_930 = copy__339 = None\n copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931); arg488_1 = getitem_931 = copy__340 = None\n copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932); arg489_1 = getitem_932 = copy__341 = None\n copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933); arg490_1 = getitem_933 = copy__342 = None\n copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934); arg491_1 = getitem_934 = copy__343 = None\n copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935); arg492_1 = getitem_935 = copy__344 = None\n copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936); arg493_1 = getitem_936 = copy__345 = None\n copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937); arg494_1 = getitem_937 = copy__346 = None\n copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938); arg495_1 = getitem_938 = copy__347 = None\n copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939); arg496_1 = getitem_939 = copy__348 = None\n copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940); arg497_1 = getitem_940 = copy__349 = None\n copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941); arg498_1 = getitem_941 = copy__350 = None\n copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942); arg499_1 = getitem_942 = copy__351 = None\n copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943); arg500_1 = getitem_943 = copy__352 = None\n copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944); arg501_1 = getitem_944 = copy__353 = None\n copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945); arg502_1 = getitem_945 = copy__354 = None\n copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946); arg503_1 = getitem_946 = copy__355 = None\n copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947); arg504_1 = getitem_947 = copy__356 = None\n copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948); arg505_1 = getitem_948 = copy__357 = None\n copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949); arg506_1 = getitem_949 = copy__358 = None\n copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950); arg507_1 = getitem_950 = copy__359 = None\n copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951); arg508_1 = getitem_951 = copy__360 = None\n copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952); arg509_1 = getitem_952 = copy__361 = None\n copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953); arg510_1 = getitem_953 = copy__362 = None\n copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954); arg511_1 = getitem_954 = copy__363 = None\n copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955); arg512_1 = getitem_955 = copy__364 = None\n copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956); arg513_1 = getitem_956 = copy__365 = None\n copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957); arg514_1 = getitem_957 = copy__366 = None\n copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958); arg515_1 = getitem_958 = copy__367 = None\n copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959); arg516_1 = getitem_959 = copy__368 = None\n copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960); arg517_1 = getitem_960 = copy__369 = None\n copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961); arg518_1 = getitem_961 = copy__370 = None\n copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962); arg519_1 = getitem_962 = copy__371 = None\n copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963); arg520_1 = getitem_963 = copy__372 = None\n copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964); arg521_1 = getitem_964 = copy__373 = None\n copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965); arg522_1 = getitem_965 = copy__374 = None\n copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966); arg523_1 = getitem_966 = copy__375 = None\n copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967); arg524_1 = getitem_967 = copy__376 = None\n copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968); arg525_1 = getitem_968 = copy__377 = None\n copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969); arg526_1 = getitem_969 = copy__378 = None\n copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970); arg527_1 = getitem_970 = copy__379 = None\n copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971); arg528_1 = getitem_971 = copy__380 = None\n copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972); arg529_1 = getitem_972 = copy__381 = None\n copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973); arg530_1 = getitem_973 = copy__382 = None\n copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974); arg531_1 = getitem_974 = copy__383 = None\n copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975); arg532_1 = getitem_975 = copy__384 = None\n copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976); arg533_1 = getitem_976 = copy__385 = None\n copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977); arg534_1 = getitem_977 = copy__386 = None\n copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978); arg535_1 = getitem_978 = copy__387 = None\n copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979); arg536_1 = getitem_979 = copy__388 = None\n copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980); arg537_1 = getitem_980 = copy__389 = None\n copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981); arg538_1 = getitem_981 = copy__390 = None\n copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982); arg539_1 = getitem_982 = copy__391 = None\n copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983); arg540_1 = getitem_983 = copy__392 = None\n copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984); arg541_1 = getitem_984 = copy__393 = None\n copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985); arg542_1 = getitem_985 = copy__394 = None\n copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986); arg543_1 = getitem_986 = copy__395 = None\n copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987); arg544_1 = getitem_987 = copy__396 = None\n copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988); arg545_1 = getitem_988 = copy__397 = None\n copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989); arg546_1 = getitem_989 = copy__398 = None\n copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990); arg547_1 = getitem_990 = copy__399 = None\n copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991); arg548_1 = getitem_991 = copy__400 = None\n copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992); arg549_1 = getitem_992 = copy__401 = None\n copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993); arg550_1 = getitem_993 = copy__402 = None\n copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994); arg551_1 = getitem_994 = copy__403 = None\n copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995); arg552_1 = getitem_995 = copy__404 = None\n copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996); arg553_1 = getitem_996 = copy__405 = None\n copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997); arg554_1 = getitem_997 = copy__406 = None\n copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998); arg555_1 = getitem_998 = copy__407 = None\n copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999); arg556_1 = getitem_999 = copy__408 = None\n copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000); arg557_1 = getitem_1000 = copy__409 = None\n copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001); arg558_1 = getitem_1001 = copy__410 = None\n copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002); arg559_1 = getitem_1002 = copy__411 = None\n copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003); arg560_1 = getitem_1003 = copy__412 = None\n copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004); arg561_1 = getitem_1004 = copy__413 = None\n copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005); arg562_1 = getitem_1005 = copy__414 = None\n copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006); arg563_1 = getitem_1006 = copy__415 = None\n copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007); arg564_1 = getitem_1007 = copy__416 = None\n copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008); arg565_1 = getitem_1008 = copy__417 = None\n copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009); arg566_1 = getitem_1009 = copy__418 = None\n copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010); arg567_1 = getitem_1010 = copy__419 = None\n copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011); arg568_1 = getitem_1011 = copy__420 = None\n copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012); arg569_1 = getitem_1012 = copy__421 = None\n copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013); arg570_1 = getitem_1013 = copy__422 = None\n copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014); arg571_1 = getitem_1014 = copy__423 = None\n copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015); arg572_1 = getitem_1015 = copy__424 = None\n copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016); arg573_1 = getitem_1016 = copy__425 = None\n copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017); arg574_1 = getitem_1017 = copy__426 = None\n copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018); arg575_1 = getitem_1018 = copy__427 = None\n copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019); arg576_1 = getitem_1019 = copy__428 = None\n copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020); arg577_1 = getitem_1020 = copy__429 = None\n copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021); arg578_1 = getitem_1021 = copy__430 = None\n copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022); arg579_1 = getitem_1022 = copy__431 = None\n copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023); arg580_1 = getitem_1023 = copy__432 = None\n copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024); arg581_1 = getitem_1024 = copy__433 = None\n copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025); arg582_1 = getitem_1025 = copy__434 = None\n copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026); arg583_1 = getitem_1026 = copy__435 = None\n copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027); arg584_1 = getitem_1027 = copy__436 = None\n copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028); arg585_1 = getitem_1028 = copy__437 = None\n copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029); arg586_1 = getitem_1029 = copy__438 = None\n copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030); arg587_1 = getitem_1030 = copy__439 = None\n copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031); arg588_1 = getitem_1031 = copy__440 = None\n copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032); arg589_1 = getitem_1032 = copy__441 = None\n copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033); arg590_1 = getitem_1033 = copy__442 = None\n copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034); arg591_1 = getitem_1034 = copy__443 = None\n copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035); arg592_1 = getitem_1035 = copy__444 = None\n copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem); arg593_1 = getitem = copy__445 = None\n copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2); arg594_1 = getitem_2 = copy__446 = None\n copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3); arg595_1 = getitem_3 = copy__447 = None\n copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4); arg596_1 = getitem_4 = copy__448 = None\n copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5); arg597_1 = getitem_5 = copy__449 = None\n copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6); arg598_1 = getitem_6 = copy__450 = None\n copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7); arg599_1 = getitem_7 = copy__451 = None\n copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8); arg600_1 = getitem_8 = copy__452 = None\n copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9); arg601_1 = getitem_9 = copy__453 = None\n copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10); arg602_1 = getitem_10 = copy__454 = None\n copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11); arg603_1 = getitem_11 = copy__455 = None\n copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12); arg604_1 = getitem_12 = copy__456 = None\n copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13); arg605_1 = getitem_13 = copy__457 = None\n copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14); arg606_1 = getitem_14 = copy__458 = None\n copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15); arg607_1 = getitem_15 = copy__459 = None\n copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16); arg608_1 = getitem_16 = copy__460 = None\n copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17); arg609_1 = getitem_17 = copy__461 = None\n copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18); arg610_1 = getitem_18 = copy__462 = None\n copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19); arg611_1 = getitem_19 = copy__463 = None\n copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20); arg612_1 = getitem_20 = copy__464 = None\n copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21); arg613_1 = getitem_21 = copy__465 = None\n copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22); arg614_1 = getitem_22 = copy__466 = None\n copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23); arg615_1 = getitem_23 = copy__467 = None\n copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24); arg616_1 = getitem_24 = copy__468 = None\n copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25); arg617_1 = getitem_25 = copy__469 = None\n copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26); arg618_1 = getitem_26 = copy__470 = None\n copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27); arg619_1 = getitem_27 = copy__471 = None\n copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28); arg620_1 = getitem_28 = copy__472 = None\n copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29); arg621_1 = getitem_29 = copy__473 = None\n copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30); arg622_1 = getitem_30 = copy__474 = None\n copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31); arg623_1 = getitem_31 = copy__475 = None\n copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32); arg624_1 = getitem_32 = copy__476 = None\n copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33); arg625_1 = getitem_33 = copy__477 = None\n copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34); arg626_1 = getitem_34 = copy__478 = None\n copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35); arg627_1 = getitem_35 = copy__479 = None\n copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36); arg628_1 = getitem_36 = copy__480 = None\n copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37); arg629_1 = getitem_37 = copy__481 = None\n copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38); arg630_1 = getitem_38 = copy__482 = None\n copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39); arg631_1 = getitem_39 = copy__483 = None\n copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40); arg632_1 = getitem_40 = copy__484 = None\n copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41); arg633_1 = getitem_41 = copy__485 = None\n copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42); arg634_1 = getitem_42 = copy__486 = None\n copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43); arg635_1 = getitem_43 = copy__487 = None\n copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44); arg636_1 = getitem_44 = copy__488 = None\n copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45); arg637_1 = getitem_45 = copy__489 = None\n copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46); arg638_1 = getitem_46 = copy__490 = None\n copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47); arg639_1 = getitem_47 = copy__491 = None\n copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48); arg640_1 = getitem_48 = copy__492 = None\n copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49); arg641_1 = getitem_49 = copy__493 = None\n copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50); arg642_1 = getitem_50 = copy__494 = None\n copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51); arg643_1 = getitem_51 = copy__495 = None\n copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52); arg644_1 = getitem_52 = copy__496 = None\n copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53); arg645_1 = getitem_53 = copy__497 = None\n copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54); arg646_1 = getitem_54 = copy__498 = None\n copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55); arg647_1 = getitem_55 = copy__499 = None\n copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56); arg648_1 = getitem_56 = copy__500 = None\n copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57); arg649_1 = getitem_57 = copy__501 = None\n copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58); arg650_1 = getitem_58 = copy__502 = None\n copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59); arg651_1 = getitem_59 = copy__503 = None\n copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60); arg652_1 = getitem_60 = copy__504 = None\n copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61); arg653_1 = getitem_61 = copy__505 = None\n copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62); arg654_1 = getitem_62 = copy__506 = None\n copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63); arg655_1 = getitem_63 = copy__507 = None\n copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64); arg656_1 = getitem_64 = copy__508 = None\n copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65); arg657_1 = getitem_65 = copy__509 = None\n copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66); arg658_1 = getitem_66 = copy__510 = None\n copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67); arg659_1 = getitem_67 = copy__511 = None\n copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68); arg660_1 = getitem_68 = copy__512 = None\n copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69); arg661_1 = getitem_69 = copy__513 = None\n copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70); arg662_1 = getitem_70 = copy__514 = None\n copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71); arg663_1 = getitem_71 = copy__515 = None\n copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72); arg664_1 = getitem_72 = copy__516 = None\n copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73); arg665_1 = getitem_73 = copy__517 = None\n copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74); arg666_1 = getitem_74 = copy__518 = None\n copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75); arg667_1 = getitem_75 = copy__519 = None\n copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76); arg668_1 = getitem_76 = copy__520 = None\n copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77); arg669_1 = getitem_77 = copy__521 = None\n copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78); arg670_1 = getitem_78 = copy__522 = None\n copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79); arg671_1 = getitem_79 = copy__523 = None\n copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80); arg672_1 = getitem_80 = copy__524 = None\n copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81); arg673_1 = getitem_81 = copy__525 = None\n copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82); arg674_1 = getitem_82 = copy__526 = None\n copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83); arg675_1 = getitem_83 = copy__527 = None\n copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84); arg676_1 = getitem_84 = copy__528 = None\n copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85); arg677_1 = getitem_85 = copy__529 = None\n copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86); arg678_1 = getitem_86 = copy__530 = None\n copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87); arg679_1 = getitem_87 = copy__531 = None\n copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88); arg680_1 = getitem_88 = copy__532 = None\n copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89); arg681_1 = getitem_89 = copy__533 = None\n copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90); arg682_1 = getitem_90 = copy__534 = None\n copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91); arg683_1 = getitem_91 = copy__535 = None\n copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92); arg684_1 = getitem_92 = copy__536 = None\n copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93); arg685_1 = getitem_93 = copy__537 = None\n copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94); arg686_1 = getitem_94 = copy__538 = None\n copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95); arg687_1 = getitem_95 = copy__539 = None\n copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96); arg688_1 = getitem_96 = copy__540 = None\n copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97); arg689_1 = getitem_97 = copy__541 = None\n copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98); arg690_1 = getitem_98 = copy__542 = None\n copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99); arg691_1 = getitem_99 = copy__543 = None\n copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100); arg692_1 = getitem_100 = copy__544 = None\n copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101); arg693_1 = getitem_101 = copy__545 = None\n copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102); arg694_1 = getitem_102 = copy__546 = None\n copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103); arg695_1 = getitem_103 = copy__547 = None\n copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104); arg696_1 = getitem_104 = copy__548 = None\n copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105); arg697_1 = getitem_105 = copy__549 = None\n copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106); arg698_1 = getitem_106 = copy__550 = None\n copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107); arg699_1 = getitem_107 = copy__551 = None\n copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108); arg700_1 = getitem_108 = copy__552 = None\n copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109); arg701_1 = getitem_109 = copy__553 = None\n copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110); arg702_1 = getitem_110 = copy__554 = None\n copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111); arg703_1 = getitem_111 = copy__555 = None\n copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112); arg704_1 = getitem_112 = copy__556 = None\n copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113); arg705_1 = getitem_113 = copy__557 = None\n copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114); arg706_1 = getitem_114 = copy__558 = None\n copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115); arg707_1 = getitem_115 = copy__559 = None\n copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116); arg708_1 = getitem_116 = copy__560 = None\n copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117); arg709_1 = getitem_117 = copy__561 = None\n copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118); arg710_1 = getitem_118 = copy__562 = None\n copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119); arg711_1 = getitem_119 = copy__563 = None\n copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120); arg712_1 = getitem_120 = copy__564 = None\n copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121); arg713_1 = getitem_121 = copy__565 = None\n copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122); arg714_1 = getitem_122 = copy__566 = None\n copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123); arg715_1 = getitem_123 = copy__567 = None\n copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124); arg716_1 = getitem_124 = copy__568 = None\n copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125); arg717_1 = getitem_125 = copy__569 = None\n copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126); arg718_1 = getitem_126 = copy__570 = None\n copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127); arg719_1 = getitem_127 = copy__571 = None\n copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128); arg720_1 = getitem_128 = copy__572 = None\n copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129); arg721_1 = getitem_129 = copy__573 = None\n copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130); arg722_1 = getitem_130 = copy__574 = None\n copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131); arg723_1 = getitem_131 = copy__575 = None\n copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132); arg724_1 = getitem_132 = copy__576 = None\n copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133); arg725_1 = getitem_133 = copy__577 = None\n copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134); arg726_1 = getitem_134 = copy__578 = None\n copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135); arg727_1 = getitem_135 = copy__579 = None\n copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136); arg728_1 = getitem_136 = copy__580 = None\n copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137); arg729_1 = getitem_137 = copy__581 = None\n copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138); arg730_1 = getitem_138 = copy__582 = None\n copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139); arg731_1 = getitem_139 = copy__583 = None\n copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140); arg732_1 = getitem_140 = copy__584 = None\n copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141); arg733_1 = getitem_141 = copy__585 = None\n copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142); arg734_1 = getitem_142 = copy__586 = None\n copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143); arg735_1 = getitem_143 = copy__587 = None\n copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144); arg736_1 = getitem_144 = copy__588 = None\n copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145); arg737_1 = getitem_145 = copy__589 = None\n copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146); arg738_1 = getitem_146 = copy__590 = None\n copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147); arg739_1 = getitem_147 = copy__591 = None\n return ()\n \n# To see more debug info, please use `graph_module.print_readable()`", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ucvyfteusmf3hkyqsnlp5ug5dh4kqnbxlr56s7pvkidgpyg2jx5] example_inputs[151]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6gu2nloxzcxrfltpbua7lqp7kkjihxd4w7afynotijqr3v7nhh5] example_inputs[152]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[164]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[165]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[177]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[178]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[184]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[261]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[262]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[263]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[264]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[265]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[266]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[267]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[268]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[269]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[270]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[271]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[272]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[273]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[274]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[275]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[276]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[277]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[278]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[279]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[280]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[281]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[282]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[283]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[284]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[285]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[286]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[287]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[288]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[289]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[290]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[291]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[292]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[293]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[294]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[295]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[296]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[297]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[298]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[299]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[300]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[301]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[302]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[303]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[304]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[305]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[306]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[307]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[308]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[309]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[310]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[311]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[312]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[313]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[314]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[315]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[316]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[317]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[318]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[319]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[320]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[321]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[322]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[323]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[324]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[325]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[326]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[327]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[328]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[329]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[330]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[331]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[332]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[333]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[334]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[335]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[336]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[337]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[338]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[339]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[340]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[341]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[342]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[343]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[344]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[345]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[346]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[347]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[348]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[349]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[350]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[351]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[352]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[353]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[354]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[355]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[356]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[357]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[358]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[359]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[360]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[361]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[362]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[363]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[364]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[365]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[366]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[367]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[368]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[369]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[370]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[371]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[372]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[373]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[374]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[375]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[376]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[377]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[378]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[379]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[380]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[381]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[382]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[383]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[384]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[385]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[386]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[387]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[388]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[389]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[390]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[391]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[392]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[393]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[394]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[395]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[396]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[397]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[398]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[399]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[400]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[401]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[402]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[403]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[404]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[405]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[406]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[407]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[408]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[409]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[410]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[411]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[412]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[413]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[414]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[415]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[416]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[417]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[418]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[419]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[420]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[421]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[422]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[423]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[424]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[425]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[426]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[427]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[428]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[429]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[430]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[431]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[432]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[433]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[434]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[435]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[436]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[437]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[438]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[439]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[440]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[441]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[442]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[443]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[444]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[445]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[446]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[447]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[448]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[449]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[450]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[451]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[452]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[453]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[454]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[455]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[456]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[457]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[458]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[459]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[460]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[461]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[462]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[463]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[464]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[465]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[466]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[467]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[468]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[469]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[470]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[471]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[472]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[473]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[474]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[475]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[476]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[477]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[478]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[479]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[480]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[481]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[482]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[483]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[484]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[485]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[486]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[487]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[488]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[489]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[490]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[491]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[492]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[493]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[494]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[495]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[496]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[497]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[498]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[499]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[500]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[501]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[502]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[503]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[504]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[505]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[506]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[507]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[508]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[509]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[510]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[511]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[512]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[513]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[514]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[515]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[516]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[517]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[518]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[519]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[520]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[521]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[522]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[523]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[524]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[525]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[526]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[527]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[528]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[529]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[530]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[531]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[532]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[533]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[534]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[535]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[536]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[537]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[538]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[539]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[540]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[541]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[542]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[543]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[544]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[545]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[546]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[547]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[548]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[549]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[550]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[551]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[552]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[553]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[554]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[555]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[556]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[557]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[558]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[559]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[560]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[561]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[562]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[563]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[564]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[565]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[566]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[567]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[568]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[569]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[570]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[571]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[572]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[573]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[574]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[575]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[576]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[577]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[578]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[579]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[580]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[581]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[582]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[583]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[584]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[585]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[586]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[587]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[588]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[589]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[590]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[591]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[592]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[593]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[594]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[595]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[596]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[597]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[598]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[599]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[600]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[601]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[602]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[603]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[604]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[605]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[606]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[607]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[608]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[609]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[610]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[611]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[612]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[613]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[614]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[615]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[616]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[617]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[618]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[619]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[620]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[621]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[622]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[623]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[624]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[625]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[626]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[627]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[628]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[629]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[630]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[631]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[632]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[633]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[634]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[635]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[636]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[637]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[638]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[639]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[640]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[641]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[642]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[643]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[644]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[645]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[646]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[647]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[648]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[649]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[650]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[651]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[652]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[653]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[654]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[655]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[656]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[657]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[658]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[659]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[660]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[661]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[662]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[663]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[664]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[665]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[666]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[667]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[668]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[669]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[670]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[671]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[672]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[673]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[674]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[675]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[676]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[677]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[678]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[679]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[680]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[681]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[682]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[683]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[684]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[685]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[686]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[687]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[688]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[689]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[690]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[691]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[692]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[693]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[694]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[695]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[696]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[697]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[698]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[699]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[700]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[701]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[702]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[703]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[704]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[705]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[706]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[707]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[708]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[709]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[710]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[711]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[712]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[713]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[714]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[715]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[716]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[717]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[718]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[719]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[720]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[721]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[722]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[723]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[724]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[725]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[726]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[727]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[728]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[729]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[730]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[731]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[732]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[733]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[734]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[735]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[736]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[737]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[738]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[739]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[t755of6lmsc7np3j6spka2x5yvicie732qv4wx6uu67rphf6elu] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739]", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] fx_kwargs[user_visible_outputs]: {}", "[5gxbt6glr3gti63xp7cch6ofdqfxvag7hsiwxbut4if4xrx6d4g] inputs_to_check[0]: 151", "[iaarrh4a5kr5bv73kkmrhkca4ysumu2vh65kdzworlx74re2dpo] inputs_to_check[1]: 152", "[qmhmrekvpai4tc7rejm43nkaq3mgt2cy6w6mzkdg2pdzt4xbwx2] inputs_to_check[2]: 153", "[qrczcfquzsvwccgmqugssyaib555w6hfdt4shqzvb6brfm3i5el] inputs_to_check[3]: 154", "[6osxch7or66drdp4fy276u75unoezfbp32qtojzfami7nbek767] inputs_to_check[4]: 155", "[o3fk4nlt4btpkqljmyxroatirrdhqjr4d264i2tywc6raomfjsw] inputs_to_check[5]: 156", "[33b7c6n5un2rbt5kdgtbsh6c64len6cjzmios66nja6gia4ojcx] inputs_to_check[6]: 157", "[ccishwh2vlgdi4q6qdu3en4gukgptvbrqyx5rofx72wz3bicnzy] inputs_to_check[7]: 158", "[6pn6oydkkil5wvbpu5uvdffsyymbzhxx3t2skamg4wp5vtb3n5k] inputs_to_check[8]: 159", "[alex6ca6gpzizomfu3wq3xj36jnymygy7hiroowxhaypic6tskv] inputs_to_check[9]: 160", "[bivqezf4ymabhorni5gd4fe3urab3kvepwenq5gmvosf2pavdd5] inputs_to_check[10]: 161", "[asq5khhnfffkvmnnmgeoqsye4l64y7kkvyk3snk2tyrgf2qb5mi] inputs_to_check[11]: 162", "[wz2lj7dbnoawsawyjhobr6oa52jgjntn3o3lmrtkb4bmzfapwhc] inputs_to_check[12]: 163", "[ngbs2fex7zmmncfaogkkrqimpfgq4wjlkqmf3tcyeswwe3hg3od] inputs_to_check[13]: 164", "[fssplvrotxdu6guo4defun7h45ns624vjjonloaz7etd7ggxjg4] inputs_to_check[14]: 165", "[sc6cvy6nxzmjuelrp2whvlhran2f4f6elzbuslbei3dfnaq6qkv] inputs_to_check[15]: 166", "[p44ecp3xwjohf6mop4nzmt5wxi7uzcii63xm5kryejmipxkfjev] inputs_to_check[16]: 167", "[xngi5shtw7jcqe2utazf35f7dj5ypzfuduqyypjo6jvv32knjf3] inputs_to_check[17]: 168", "[niz4gr5x3ya3ukuhf4c7rnhhrswyan3kbzclc6g52u3kvurqmf4] inputs_to_check[18]: 169", "[bfsiwqbrjfxsnzozsohdqmqjqwa5itj3abqdtdwxf7vxxw2t7iq] inputs_to_check[19]: 170", "[ditguzdbhtwz6l4chqi3z6cziyi2pct43zhzf5zaragfhqf5akd] inputs_to_check[20]: 171", "[e3h4w2vfojacabco57r6aj43dgqrhfx545yrdu7qzxkvjyf52t5] inputs_to_check[21]: 172", "[mfhhdultow3ovihpso6dtsdpntivwjcpfismlaqwsjc6iainhtu] inputs_to_check[22]: 173", "[kpyorpxsozia3z5wqrbnqnnff6bma7xllpbedgwp4gp76wwwegc] inputs_to_check[23]: 174", "[lvgyjizlzse4yknfjmvl2uqg4ffygwpuddjgunbxcbjm5g5d5fw] inputs_to_check[24]: 175", "[qgbusewwprhncdk5ahq4fytqvx7fhrecfetchddqudoq7lkyx3b] inputs_to_check[25]: 176", "[hmzhjnkpczbvqnshjcfp45u74kngsbufwspp5fa5e4zidpcwwcs] inputs_to_check[26]: 177", "[oqg53nqin266dynz7o2z27qo3hcxucd5yc3or2656uoqs6quan2] inputs_to_check[27]: 178", "[k6swhf5ty2zmij2rxhvvpe7jwdxx6llqawhiqixvpvjgc6dhkl6] inputs_to_check[28]: 179", "[a5s6scmr5zbjnm5a6isxptkuttpfh2uv2g7tza7ci6z366qvhph] inputs_to_check[29]: 180", "[ma4hmi2zhz2v5pv6nw7owyw3z3lswrc5nveayyb3j5iastoc7zl] inputs_to_check[30]: 181", "[4nwllnnjxh7swihocoe7pvsijtlpzpz6yjwckaahmgpaeimozt7] inputs_to_check[31]: 182", "[wzkita6x3wixk2x6zsfnpzfqrfaezroxu5xnw7rq7ox7vqgu476] inputs_to_check[32]: 183", "[xrjqiropfaxwzdwzzhyx6e6c7hjnkppzronjqgpzo6hbpi6yr4g] inputs_to_check[33]: 184", "[urai455awfpx3bjmqadnlbnbklixi676brusjoruximpnjo76n2] inputs_to_check[34]: 185", "[eermw7oa3whtf4qdg6cogh7s3xez3tfiest32uplhnwmagpouh6] inputs_to_check[35]: 186", "[64xsnjmjbzfylxidk5yl6hsvuzswofeqbq3zuv7a6bkdopfeapn] inputs_to_check[36]: 187", "[6kuevh47g3elelxoo5ac7cmo3r2fh2ygbhs6qyljspkz4y6r7r7] inputs_to_check[37]: 188", "[ihnc7ngbkirbzwxoyjfhpwki2ewnnpkuzxlegp6fmw6fykdxxj3] inputs_to_check[38]: 189", "[g3ay2xbjws2ov73c4lkobfibuq4wxwxe75uogzdg7crgtzlagn5] inputs_to_check[39]: 190", "[6lydlqaer6b3qvlthv7uluevii5gvxgissp4oodsoye43zyvm7f] inputs_to_check[40]: 191", "[mrm72xpjwecc4eczy6w3ndrca6qgx4ssucludsfllsadesjz7pl] inputs_to_check[41]: 192", "[7cz46kewyqtcfh7adjmk5i2ljoq5v44ofijq4gmlca7gjy55c6r] inputs_to_check[42]: 193", "[2ubcxo5fpwyipcg26qkwk7dfk4ci2edpvwxh4fhvgvstq4cf3ke] inputs_to_check[43]: 194", "[idxzwtb6yotm5u6qhotbqxbuytqogl4lyuzcp4f4rpiekingxrj] inputs_to_check[44]: 195", "[sxompataxg2kpp6lvmimnzadenerjisuicfkfuwm5exoinhfbsg] inputs_to_check[45]: 196", "[5pmk7sv27s2bi54s3kwyduqs4ly256qfb2hfrlqfamtcsbur3iv] inputs_to_check[46]: 197", "[pujj5ix4dbdajeweoew7fe743v6v6wscq7k7pjsqiqopewlh6s4] inputs_to_check[47]: 198", "[tfk4gvmeljn6oc7yzg7ablm5slfgj5iwldvib27lgy3acro6g77] inputs_to_check[48]: 199", "[3yqkxangefsazunaw2ibltnnexjixpvosdxyq7kipwrmhng4d66] inputs_to_check[49]: 200", "[dek4vtwl3t4tioy2oedefor7hqzq7doc3fj4wwdmgrfpt773mvr] inputs_to_check[50]: 201", "[eyzompn7rqbpbwprodhvszb4fjs3fubclamjylwqsna5imftnou] inputs_to_check[51]: 202", "[cna2jzzfijl2grhnqpag2peenci7zfourhgcdzidromdrqdyvwm] inputs_to_check[52]: 203", "[m5mnhtreky3cpmvgnfmbkri3pmhs22tu3kahhkdxv2q67t7rtxk] inputs_to_check[53]: 204", "[pf3yxn6pwjw3apolzviv77ube4xeqq2n2lgwcduyjvzgiyxg45s] inputs_to_check[54]: 205", "[zlqnl55vmxcplhlix7khtasmq2gdecqd7jpore57pll2by4u67y] inputs_to_check[55]: 206", "[6u3htmimfebyyyavsbzctid7bqe3p2vzitaht7rhqdc6l653asq] inputs_to_check[56]: 207", "[vnxrp3cswdykkkdcda2rykgrj6p7mbsgoq2euf3nhebgbrbdnah] inputs_to_check[57]: 208", "[xkuyzvn72atoye7xvdr7nkkl6r43muegtld7i23uic3gez4op3w] inputs_to_check[58]: 209", "[mc4hinl3b4abbhnnd6kjw3mpbdnhcszhce746aznurtp7rckvqe] inputs_to_check[59]: 210", "[ofm6prxr6hqz2u5z3oywwp5635di572xearfxgeqikq7ir5zyer] inputs_to_check[60]: 211", "[ib352syzxfoxetnwcwmr562kq6zxh3ba6k6ozr7vmgirzngz445] inputs_to_check[61]: 212", "[z7vzn4qm5gv2ec4zm5oa552msg56z4an6jyi43vpqrh6rcwtww5] inputs_to_check[62]: 213", "[6ijaweudkgtayjajytjpkgptbxqygprffq2iv65twukqg2ks24j] inputs_to_check[63]: 214", "[zr4ja2xbcw5fkklyjkk4dqkrixbqthrmy4gx6wolog3g6twxagb] inputs_to_check[64]: 215", "[f7rs6g77lmqs6rceoayty7ukws77rxwoi2litnshxyvn2l4qcrs] inputs_to_check[65]: 216", "[iq4ks6jqbobe4ub7eedwgwhm6u7dujn4pdk6xtzlsutw35gihbh] inputs_to_check[66]: 217", "[c4tbwksvxlxtiymirqu6houav6ecq5pr3zf4phc5ksuu6ccao5t] inputs_to_check[67]: 218", "[i3gn36xaneuwkxpjtyp3iiaeudseihrk47h5len33wjeyzt6ez5] inputs_to_check[68]: 219", "[2hfliifbgstom3wq5au6yesetrbhjmazl3j2z7o4wvztaigyd7p] inputs_to_check[69]: 220", "[jsavgbo66sdqebklxk3p76jjgnvror75kkpwbfrr2grkuudknxt] inputs_to_check[70]: 221", "[nl6eg6mm4g66lxwzopu5webjhh4pq52imycgdnbzw5sdafpfzuh] inputs_to_check[71]: 222", "[uulsqibga6yzqtpej2uf4km34ygpbdt4gya2hth26yioguk4rlq] inputs_to_check[72]: 223", "[ptaz43zzcc5wqnllxp3fe7pvbo75xmcjlabttblccy46hygeozy] inputs_to_check[73]: 224", "[wlmb2nn3nq5s77bhgmozz6k5xgxru6empq7wgpphhcgprnpnc7o] inputs_to_check[74]: 225", "[ltksjik3ctg6uqqzzgdjyvza6wpx757hanbg2zoy3qa4k3a4vpx] inputs_to_check[75]: 226", "[7u7ifib46vahqccsmg654f3uzmjbluuqepiqql3s5ozwtzqz7pp] inputs_to_check[76]: 227", "[osag4eozwslt5b7yuzth3lgba32eotdkqtr2kw4ey4i43bgvvrs] inputs_to_check[77]: 228", "[zwdkykxjgt356ykebzld3rzgsfc5zlfk5st4we3ykzkaba3oqex] inputs_to_check[78]: 229", "[iungcnzcibs3necrx3njdt2ckikflhexkoicbep7tvcvtj5ly5d] inputs_to_check[79]: 230", "[bwmghobfcwh2lrdjqskkhe6u3vox2fbz53b65rgrzlmndirmzbm] inputs_to_check[80]: 231", "[qhj52t7zdp3oargrkm4bg6aao35lsfeuhsm5pgevylkqroeb4lc] inputs_to_check[81]: 232", "[bz4ayxadi54u4x7rrse46x6v7bfpgk5qy4scu6kg76fjuxlpy6u] inputs_to_check[82]: 233", "[gw4sqgphdvlxdqktwxjso65pxpymqslnhgr6l4eyswl5izdixw6] inputs_to_check[83]: 234", "[ewdr2tnhafkagyyp46wn5led3h754p4nzttu3w54uhxrqux3vvn] inputs_to_check[84]: 235", "[4e7a3dp2ygblswjy2t4s5ytwvfgeppo6v5xajwfebrzqvxl5c3z] inputs_to_check[85]: 236", "[dnrgqiivi7fu47qt3k5ea43mmzonrx62hvlvj7p4zs2whj47y7d] inputs_to_check[86]: 237", "[pyoye6lh4ebpuzincukqsblo7sz2ok35q5n23bykkflupvr6b3x] inputs_to_check[87]: 238", "[zzav4ck6zf5ii2aadgk45satnvsrcelh7lwasful7siezs7emg2] inputs_to_check[88]: 239", "[jqli3sayeay6jxebdo6gg7uiifocbslkvp3hv7kgpgetcctr6kk] inputs_to_check[89]: 240", "[xfscvbmf5xe24dzns4sojux47uhkj3rx5escbpdnh74elnkufk4] inputs_to_check[90]: 241", "[h22xhecdg6loiwavuwtlmkjqgxj23bujpayu7r7cnuwow22zy2e] inputs_to_check[91]: 242", "[ffeweldbvparqtiiuqrj2k57nqdefczqg2x5n36j3cgfubpzl2r] inputs_to_check[92]: 243", "[ccpnfy7cohyrspuggdpbc7wpz4ul2fjni56cul6rgpztmmgrmbz] inputs_to_check[93]: 244", "[wdxh2vlxp5oip6lbdhdc5bur247q4qljosyhafat4nqrygspikw] inputs_to_check[94]: 245", "[imw7rafsd3kl6dhscu76cm37kz7cnlrn4cz4y77hnvu6epvrakh] inputs_to_check[95]: 246", "[s7byzhoacpzsci2aqrvogeb7f5f7egbzqw5uujgvswtjutgh33t] inputs_to_check[96]: 247", "[usr2ovytp5xi5pvkziuuu7bpg7zfa44xckrw7mx42ad2sixrfjw] inputs_to_check[97]: 248", "[spsub4jhn3s5o5rdysoeyv5mqczcmmn3t3hmuflioyvd4im4pfe] inputs_to_check[98]: 249", "[7c6i6h6bfell5u33q6rcv25lpgmk4jah3uhjjx6bjevvjnshoim] inputs_to_check[99]: 250", "[ah4sag2igwizdxkml6voaf342455hrlpr6cesdepe5njv6zahlq] inputs_to_check[100]: 251", "[5svbus4u4wap6a3z767wrjlymc7g7qft4ugaae65e5t7tvvigpv] inputs_to_check[101]: 252", "[6dwei2ltmufaindqw57by4jqhptur5xijexpjttzbqiw5xq7ufb] inputs_to_check[102]: 253", "[r557yxhzgnvuaqxq2y2aisgxqacm3cndl3efydbr3l5u7t6vaao] inputs_to_check[103]: 254", "[ouw6rzfmq6mznqvkp4ouhr3fzo3ljmtrqmyrm3pitapfmmfcq67] inputs_to_check[104]: 255", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inputs_to_check[105]: 256", "[bmucc7bpdzrvbf5petlmybacuupvmc7agiupfu54h73xhqupvaf] inputs_to_check[106]: 257", "[26uxlcwvg3bcy54iimat7oht3bxo7jvlwbqdppzk5zwkoxyv7cb] inputs_to_check[107]: 258", "[6ficbgfpvxoz32hthootgscev5fx7mrus4lxsrrnugdtmomziuz] inputs_to_check[108]: 259", "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[109]: 260", "[677wque2vebz2rd32qmi2owsgvljzdjthgg7zhc67nb7s6z46xw] inputs_to_check[110]: 261", "[m7xjumsvlwcemhzme2dqw2usksuuan5qqnmqjaetwxiar4s4j6c] inputs_to_check[111]: 262", "[paeekzgtlydskvlpjv3e3j2zbrofnrddnfj3b6ronaopjyxkseo] inputs_to_check[112]: 263", "[rwjfz3vqhvl3rixynv6iipvly4la2yaqouw2ax5qrgr43ekinaz] inputs_to_check[113]: 264", "[3rcgr2jikn5dxbda5qrx57cu7lqsoqexxqcfzwsazgqojrjzsua] inputs_to_check[114]: 265", "[uqxikezud5mf2aqkp7seqvyze7hpn2o5c2yez6uydmlvjn2eh5v] inputs_to_check[115]: 266", "[wjcwolwbnxwp5mof62qvgpjq3ar5rzt6kkdytt6wev4udsfqe6x] inputs_to_check[116]: 267", "[veogf6bsf6ikphzxxhjgpzn5jqx3g55t4aoyurw7pm76yiy7zt2] inputs_to_check[117]: 268", "[vjdrwtc6q6dw6qossmadxe3ct3fx342xxf5ebrhb42tbtrjmtoj] inputs_to_check[118]: 269", "[fzi2ibchn6t3srl4xj73y4mywe6m4to7ns3ffl2tsei46mebm5l] inputs_to_check[119]: 270", "[54mteumeehrhr42ajb2mmomf5sssoeewc5jl6nqlnib5ohxb3qd] inputs_to_check[120]: 271", "[yk2ib3hhazfcmbecutfzw356dveofwjm5aokurcon6brky4fv62] inputs_to_check[121]: 272", "[qmgadyz6iluhzeytokynhwx3fiybdqurux3poi3kx7xdkpp24io] inputs_to_check[122]: 273", "[32k7zywxkomlofwth2mo5yip7d4f63vdnnwqucsjvfiviwhzqwj] inputs_to_check[123]: 274", "[ex5ejsi3yiu26ymqgxsfzxfongdoyfsvtn6wbfmjmqan26mywvv] inputs_to_check[124]: 275", "[3r3pihjgebqvjx6yoh4q36k6bmloajxb2wp7mpovqfmbgvtihjx] inputs_to_check[125]: 276", "[jcfhgvbhktjahasdkcg4j22c4iu5wgbqo2by6mvnhkizuyl7adt] inputs_to_check[126]: 277", "[qysgpar3mwuhkwfmkdvwppvqn2rc3wkswoy4l5242blg3s6nomq] inputs_to_check[127]: 278", "[sj5qpjzwfnrnrslghhva6z7fypbpicxjbczgdsscdg7ty6dloke] inputs_to_check[128]: 279", "[dh6oykkvzpw4hh2l2kyq3n3oiaawqasgyps2bki6ouaqwr5o4c5] inputs_to_check[129]: 280", "[bqqqhzw6zzkrdgeg4wed3ge2u7wrxxweyb7ikuugm2lg5bw2low] inputs_to_check[130]: 281", "[lpgee42ktycd2ec7bvvfmts5czoojvy5rglm2fz4boqbzvem3mz] inputs_to_check[131]: 282", "[xxcd2riuuqmc632el2www5z43brah2hzj66qz5c2bl4txi6tphi] inputs_to_check[132]: 283", "[lzq356tk2daemd3eejrqwmxfuprmzobz2v54vhsfmppeq35midf] inputs_to_check[133]: 284", "[jeqk32o5ugk777bosvm26wli4suonie2j7xeyvcnflm6sh2su4f] inputs_to_check[134]: 285", "[3nrlzlfgqdttgmpwe6ae4donvgjkzv5xalpsx6dkyop7d5e7owg] inputs_to_check[135]: 286", "[2cfsjfkfvrnfsi2dtyhpmzmogddssh6uxfsq3ydka2snuhaqy4s] inputs_to_check[136]: 287", "[d27xrkheycncdo3uzfumqtruedyl7pv2ur7to3lkeg7cjfaawja] inputs_to_check[137]: 288", "[o65ulls4ibkqdqeuckzqgselabavcbzln6kizmseggtkrra6k4k] inputs_to_check[138]: 289", "[e5avxq5la5yhcl3jslzu2qsr4tcolx35t2ujwwjr7lsqbhx2gk3] inputs_to_check[139]: 290", "[cbjwd3zv52u3h7bomxvmf6ynkx4wmtm6bqzzwkzlmyr2ict4kfn] inputs_to_check[140]: 291", "[fpvvfys36hfg7uwq5l6ekyjnvf3tjpbf4d5cxo4webm7epzhpvt] inputs_to_check[141]: 292", "[bhuauv3brrxmr45r7yueymn76n3bwlyfrcrqtsbuok4ipqa5d2q] inputs_to_check[142]: 293", "[n5u6kpqzxtau4hisgec3wulumses6yh323wd6fnttpwm42i3j7x] inputs_to_check[143]: 294", "[rtapjyb4o2hwk4hyf4ep7oeikdbv4zq2ni4dilcfjnjo4sgwzvz] inputs_to_check[144]: 295", "[6aadk5hp6aqszjgpca65txkgn7cp4wttn7o6q4uv5br7qu4ubxq] inputs_to_check[145]: 296", "[ptxf4kphvduiofe7xbem4isrkenfdki3oegb53qdm2jzkbn26ed] inputs_to_check[146]: 297", "[5pxadwvblqbojkxsf7lbkowi52nvhflb3rx456ro3uostqlb7ky] inputs_to_check[147]: 298", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: ", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]} +V0806 13:56:22.388000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "6c36bbd6a016d6ab1b6561b47ca221d0"} + { + "name": "inductor_compile", + "ts": 1722977782388950.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.389000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5a6ab3e94cda31c941d856ec9820e2cf"} + { + "name": "compile_fx_inner", + "ts": 1722977782389036.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.389000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "06725ec53706eedc69b933b807c07e00"} + { + "name": "compile_fx..fw_compiler_base", + "ts": 1722977782389184.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.392000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "adb5528271cf584b9f24addb2c176a5b"} + { + "name": "create_aot_dispatcher_function", + "ts": 1722977782392701.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.393000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4a34a3f7c77ee09c5d0a426eab9bc264"} + { + "name": "backend_compile", + "ts": 1722977782393001.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.393000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "de5fc7b04fee8c81075888ee727b085a"} + { + "name": "OutputGraph.call_user_compiler", + "ts": 1722977782393078.2, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.655000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "3f414ff92a2c0fbe9a3d2f5d48d6cd73"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self) + | | +- ID_MATCH: ___check_obj_id(L['self'], 140561654732528) + | | +- NO_HASATTR: not hasattr(L['self'], 'found_inf') + | | +- NO_HASATTR: not hasattr(L['self'], 'grad_scale') + | | +- DictGuardManager: source=L['self'].state, accessed_by=GetAttrGuardAccessor(state) + | | | +- KeyValueManager pair at index=0 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['step'], 140561608014112) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['self'].param_groups[0]['params'][0], L['self'].param_groups[0]['params'][1], L['self'].param_groups[0]['params'][2], L['self'].param_groups[0]['params'][3], L['self'].param_groups[0]['params'][4], L['self'].param_groups[0]['params'][5], L['self'].param_groups[0]['params'][6], L['self'].param_groups[0]['params'][7], L['self'].param_groups[0]['params'][8], L['self'].param_groups[0]['params'][9], L['self'].param_groups[0]['params'][10], L['self'].param_groups[0]['params'][11], L['self'].param_groups[0]['params'][12], L['self'].param_groups[0]['params'][13], L['self'].param_groups[0]['params'][14], L['self'].param_groups[0]['params'][15], L['self'].param_groups[0]['params'][16], L['self'].param_groups[0]['params'][17], L['self'].param_groups[0]['params'][18], L['self'].param_groups[0]['params'][19], L['self'].param_groups[0]['params'][20], L['self'].param_groups[0]['params'][21], L['self'].param_groups[0]['params'][22], L['self'].param_groups[0]['params'][23], L['self'].param_groups[0]['params'][24], L['self'].param_groups[0]['params'][25], L['self'].param_groups[0]['params'][26], L['self'].param_groups[0]['params'][27], L['self'].param_groups[0]['params'][28], L['self'].param_groups[0]['params'][29], L['self'].param_groups[0]['params'][30], L['self'].param_groups[0]['params'][31], L['self'].param_groups[0]['params'][32], L['self'].param_groups[0]['params'][33], L['self'].param_groups[0]['params'][34], L['self'].param_groups[0]['params'][35], L['self'].param_groups[0]['params'][36], L['self'].param_groups[0]['params'][37], L['self'].param_groups[0]['params'][38], L['self'].param_groups[0]['params'][39], L['self'].param_groups[0]['params'][40], L['self'].param_groups[0]['params'][41], L['self'].param_groups[0]['params'][42], L['self'].param_groups[0]['params'][43], L['self'].param_groups[0]['params'][44], L['self'].param_groups[0]['params'][45], L['self'].param_groups[0]['params'][46], L['self'].param_groups[0]['params'][47], L['self'].param_groups[0]['params'][48], L['self'].param_groups[0]['params'][49], L['self'].param_groups[0]['params'][50], L['self'].param_groups[0]['params'][51], L['self'].param_groups[0]['params'][52], L['self'].param_groups[0]['params'][53], L['self'].param_groups[0]['params'][54], L['self'].param_groups[0]['params'][55], L['self'].param_groups[0]['params'][56], L['self'].param_groups[0]['params'][57], L['self'].param_groups[0]['params'][58], L['self'].param_groups[0]['params'][59], L['self'].param_groups[0]['params'][60], L['self'].param_groups[0]['params'][61], L['self'].param_groups[0]['params'][62], L['self'].param_groups[0]['params'][63], L['self'].param_groups[0]['params'][64], L['self'].param_groups[0]['params'][65], L['self'].param_groups[0]['params'][66], L['self'].param_groups[0]['params'][67], L['self'].param_groups[0]['params'][68], L['self'].param_groups[0]['params'][69], L['self'].param_groups[0]['params'][70], L['self'].param_groups[0]['params'][71], L['self'].param_groups[0]['params'][72], L['self'].param_groups[0]['params'][73], L['self'].param_groups[0]['params'][74], L['self'].param_groups[0]['params'][75], L['self'].param_groups[0]['params'][76], L['self'].param_groups[0]['params'][77], L['self'].param_groups[0]['params'][78], L['self'].param_groups[0]['params'][79], L['self'].param_groups[0]['params'][80], L['self'].param_groups[0]['params'][81], L['self'].param_groups[0]['params'][82], L['self'].param_groups[0]['params'][83], L['self'].param_groups[0]['params'][84], L['self'].param_groups[0]['params'][85], L['self'].param_groups[0]['params'][86], L['self'].param_groups[0]['params'][87], L['self'].param_groups[0]['params'][88], L['self'].param_groups[0]['params'][89], L['self'].param_groups[0]['params'][90], L['self'].param_groups[0]['params'][91], L['self'].param_groups[0]['params'][92], L['self'].param_groups[0]['params'][93], L['self'].param_groups[0]['params'][94], L['self'].param_groups[0]['params'][95], L['self'].param_groups[0]['params'][96], L['self'].param_groups[0]['params'][97], L['self'].param_groups[0]['params'][98], L['self'].param_groups[0]['params'][99], L['self'].param_groups[0]['params'][100], L['self'].param_groups[0]['params'][101], L['self'].param_groups[0]['params'][102], L['self'].param_groups[0]['params'][103], L['self'].param_groups[0]['params'][104], L['self'].param_groups[0]['params'][105], L['self'].param_groups[0]['params'][106], L['self'].param_groups[0]['params'][107], L['self'].param_groups[0]['params'][108], L['self'].param_groups[0]['params'][109], L['self'].param_groups[0]['params'][110], L['self'].param_groups[0]['params'][111], L['self'].param_groups[0]['params'][112], L['self'].param_groups[0]['params'][113], L['self'].param_groups[0]['params'][114], L['self'].param_groups[0]['params'][115], L['self'].param_groups[0]['params'][116], L['self'].param_groups[0]['params'][117], L['self'].param_groups[0]['params'][118], L['self'].param_groups[0]['params'][119], L['self'].param_groups[0]['params'][120], L['self'].param_groups[0]['params'][121], L['self'].param_groups[0]['params'][122], L['self'].param_groups[0]['params'][123], L['self'].param_groups[0]['params'][124], L['self'].param_groups[0]['params'][125], L['self'].param_groups[0]['params'][126], L['self'].param_groups[0]['params'][127], L['self'].param_groups[0]['params'][128], L['self'].param_groups[0]['params'][129], L['self'].param_groups[0]['params'][130], L['self'].param_groups[0]['params'][131], L['self'].param_groups[0]['params'][132], L['self'].param_groups[0]['params'][133], L['self'].param_groups[0]['params'][134], L['self'].param_groups[0]['params'][135], L['self'].param_groups[0]['params'][136], L['self'].param_groups[0]['params'][137], L['self'].param_groups[0]['params'][138], L['self'].param_groups[0]['params'][139], L['self'].param_groups[0]['params'][140], L['self'].param_groups[0]['params'][141], L['self'].param_groups[0]['params'][142], L['self'].param_groups[0]['params'][143], L['self'].param_groups[0]['params'][144], L['self'].param_groups[0]['params'][145], L['self'].param_groups[0]['params'][146], L['self'].param_groups[0]['params'][147], L['self'].state[list(L['self'].state.keys())[0]]['step'], L['self'].state[list(L['self'].state.keys())[1]]['step'], L['self'].state[list(L['self'].state.keys())[2]]['step'], L['self'].state[list(L['self'].state.keys())[3]]['step'], L['self'].state[list(L['self'].state.keys())[4]]['step'], L['self'].state[list(L['self'].state.keys())[5]]['step'], L['self'].state[list(L['self'].state.keys())[6]]['step'], L['self'].state[list(L['self'].state.keys())[7]]['step'], L['self'].state[list(L['self'].state.keys())[8]]['step'], L['self'].state[list(L['self'].state.keys())[9]]['step'], L['self'].state[list(L['self'].state.keys())[10]]['step'], L['self'].state[list(L['self'].state.keys())[11]]['step'], L['self'].state[list(L['self'].state.keys())[12]]['step'], L['self'].state[list(L['self'].state.keys())[13]]['step'], L['self'].state[list(L['self'].state.keys())[14]]['step'], L['self'].state[list(L['self'].state.keys())[15]]['step'], L['self'].state[list(L['self'].state.keys())[16]]['step'], L['self'].state[list(L['self'].state.keys())[17]]['step'], L['self'].state[list(L['self'].state.keys())[18]]['step'], L['self'].state[list(L['self'].state.keys())[19]]['step'], L['self'].state[list(L['self'].state.keys())[20]]['step'], L['self'].state[list(L['self'].state.keys())[21]]['step'], L['self'].state[list(L['self'].state.keys())[22]]['step'], L['self'].state[list(L['self'].state.keys())[23]]['step'], L['self'].state[list(L['self'].state.keys())[24]]['step'], L['self'].state[list(L['self'].state.keys())[25]]['step'], L['self'].state[list(L['self'].state.keys())[26]]['step'], L['self'].state[list(L['self'].state.keys())[27]]['step'], L['self'].state[list(L['self'].state.keys())[28]]['step'], L['self'].state[list(L['self'].state.keys())[29]]['step'], L['self'].state[list(L['self'].state.keys())[30]]['step'], L['self'].state[list(L['self'].state.keys())[31]]['step'], L['self'].state[list(L['self'].state.keys())[32]]['step'], L['self'].state[list(L['self'].state.keys())[33]]['step'], L['self'].state[list(L['self'].state.keys())[34]]['step'], L['self'].state[list(L['self'].state.keys())[35]]['step'], L['self'].state[list(L['self'].state.keys())[36]]['step'], L['self'].state[list(L['self'].state.keys())[37]]['step'], L['self'].state[list(L['self'].state.keys())[38]]['step'], L['self'].state[list(L['self'].state.keys())[39]]['step'], L['self'].state[list(L['self'].state.keys())[40]]['step'], L['self'].state[list(L['self'].state.keys())[41]]['step'], L['self'].state[list(L['self'].state.keys())[42]]['step'], L['self'].state[list(L['self'].state.keys())[43]]['step'], L['self'].state[list(L['self'].state.keys())[44]]['step'], L['self'].state[list(L['self'].state.keys())[45]]['step'], L['self'].state[list(L['self'].state.keys())[46]]['step'], L['self'].state[list(L['self'].state.keys())[47]]['step'], L['self'].state[list(L['self'].state.keys())[48]]['step'], L['self'].state[list(L['self'].state.keys())[49]]['step'], L['self'].state[list(L['self'].state.keys())[50]]['step'], L['self'].state[list(L['self'].state.keys())[51]]['step'], L['self'].state[list(L['self'].state.keys())[52]]['step'], L['self'].state[list(L['self'].state.keys())[53]]['step'], L['self'].state[list(L['self'].state.keys())[54]]['step'], L['self'].state[list(L['self'].state.keys())[55]]['step'], L['self'].state[list(L['self'].state.keys())[56]]['step'], L['self'].state[list(L['self'].state.keys())[57]]['step'], L['self'].state[list(L['self'].state.keys())[58]]['step'], L['self'].state[list(L['self'].state.keys())[59]]['step'], L['self'].state[list(L['self'].state.keys())[60]]['step'], L['self'].state[list(L['self'].state.keys())[61]]['step'], L['self'].state[list(L['self'].state.keys())[62]]['step'], L['self'].state[list(L['self'].state.keys())[63]]['step'], L['self'].state[list(L['self'].state.keys())[64]]['step'], L['self'].state[list(L['self'].state.keys())[65]]['step'], L['self'].state[list(L['self'].state.keys())[66]]['step'], L['self'].state[list(L['self'].state.keys())[67]]['step'], L['self'].state[list(L['self'].state.keys())[68]]['step'], L['self'].state[list(L['self'].state.keys())[69]]['step'], L['self'].state[list(L['self'].state.keys())[70]]['step'], L['self'].state[list(L['self'].state.keys())[71]]['step'], L['self'].state[list(L['self'].state.keys())[72]]['step'], L['self'].state[list(L['self'].state.keys())[73]]['step'], L['self'].state[list(L['self'].state.keys())[74]]['step'], L['self'].state[list(L['self'].state.keys())[75]]['step'], L['self'].state[list(L['self'].state.keys())[76]]['step'], L['self'].state[list(L['self'].state.keys())[77]]['step'], L['self'].state[list(L['self'].state.keys())[78]]['step'], L['self'].state[list(L['self'].state.keys())[79]]['step'], L['self'].state[list(L['self'].state.keys())[80]]['step'], L['self'].state[list(L['self'].state.keys())[81]]['step'], L['self'].state[list(L['self'].state.keys())[82]]['step'], L['self'].state[list(L['self'].state.keys())[83]]['step'], L['self'].state[list(L['self'].state.keys())[84]]['step'], L['self'].state[list(L['self'].state.keys())[85]]['step'], L['self'].state[list(L['self'].state.keys())[86]]['step'], L['self'].state[list(L['self'].state.keys())[87]]['step'], L['self'].state[list(L['self'].state.keys())[88]]['step'], L['self'].state[list(L['self'].state.keys())[89]]['step'], L['self'].state[list(L['self'].state.keys())[90]]['step'], L['self'].state[list(L['self'].state.keys())[91]]['step'], L['self'].state[list(L['self'].state.keys())[92]]['step'], L['self'].state[list(L['self'].state.keys())[93]]['step'], L['self'].state[list(L['self'].state.keys())[94]]['step'], L['self'].state[list(L['self'].state.keys())[95]]['step'], L['self'].state[list(L['self'].state.keys())[96]]['step'], L['self'].state[list(L['self'].state.keys())[97]]['step'], L['self'].state[list(L['self'].state.keys())[98]]['step'], L['self'].state[list(L['self'].state.keys())[99]]['step'], L['self'].state[list(L['self'].state.keys())[100]]['step'], L['self'].state[list(L['self'].state.keys())[101]]['step'], L['self'].state[list(L['self'].state.keys())[102]]['step'], L['self'].state[list(L['self'].state.keys())[103]]['step'], L['self'].state[list(L['self'].state.keys())[104]]['step'], L['self'].state[list(L['self'].state.keys())[105]]['step'], L['self'].state[list(L['self'].state.keys())[106]]['step'], L['self'].state[list(L['self'].state.keys())[107]]['step'], L['self'].state[list(L['self'].state.keys())[108]]['step'], L['self'].state[list(L['self'].state.keys())[109]]['step'], L['self'].state[list(L['self'].state.keys())[110]]['step'], L['self'].state[list(L['self'].state.keys())[111]]['step'], L['self'].state[list(L['self'].state.keys())[112]]['step'], L['self'].state[list(L['self'].state.keys())[113]]['step'], L['self'].state[list(L['self'].state.keys())[114]]['step'], L['self'].state[list(L['self'].state.keys())[115]]['step'], L['self'].state[list(L['self'].state.keys())[116]]['step'], L['self'].state[list(L['self'].state.keys())[117]]['step'], L['self'].state[list(L['self'].state.keys())[118]]['step'], L['self'].state[list(L['self'].state.keys())[119]]['step'], L['self'].state[list(L['self'].state.keys())[120]]['step'], L['self'].state[list(L['self'].state.keys())[121]]['step'], L['self'].state[list(L['self'].state.keys())[122]]['step'], L['self'].state[list(L['self'].state.keys())[123]]['step'], L['self'].state[list(L['self'].state.keys())[124]]['step'], L['self'].state[list(L['self'].state.keys())[125]]['step'], L['self'].state[list(L['self'].state.keys())[126]]['step'], L['self'].state[list(L['self'].state.keys())[127]]['step'], L['self'].state[list(L['self'].state.keys())[128]]['step'], L['self'].state[list(L['self'].state.keys())[129]]['step'], L['self'].state[list(L['self'].state.keys())[130]]['step'], L['self'].state[list(L['self'].state.keys())[131]]['step'], L['self'].state[list(L['self'].state.keys())[132]]['step'], L['self'].state[list(L['self'].state.keys())[133]]['step'], L['self'].state[list(L['self'].state.keys())[134]]['step'], L['self'].state[list(L['self'].state.keys())[135]]['step'], L['self'].state[list(L['self'].state.keys())[136]]['step'], L['self'].state[list(L['self'].state.keys())[137]]['step'], L['self'].state[list(L['self'].state.keys())[138]]['step'], L['self'].state[list(L['self'].state.keys())[139]]['step'], L['self'].state[list(L['self'].state.keys())[140]]['step'], L['self'].state[list(L['self'].state.keys())[141]]['step'], L['self'].state[list(L['self'].state.keys())[142]]['step'], L['self'].state[list(L['self'].state.keys())[143]]['step'], L['self'].state[list(L['self'].state.keys())[144]]['step'], L['self'].state[list(L['self'].state.keys())[145]]['step'], L['self'].state[list(L['self'].state.keys())[146]]['step'], L['self'].state[list(L['self'].state.keys())[147]]['step'], L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq']) + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], 140561608014592) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[50304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], 140561608013952) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[50304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=1 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]] + | | | | | +- DICT_LENGTH: len(L['self'].state[list(L['self'].state.keys())[1]]) == 3 + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['step'], 140561608013792) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], 140561608013392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1024, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], 140561608013632) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1024, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=2 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['step'], 140561608013712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], 140561608013472) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], 140561608013072) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=3 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['step'], 140561608012992) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], 140561608013312) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], 140561608013232) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=4 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['step'], 140561608012832) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], 140561608012432) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], 140561608012672) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=5 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['step'], 140561608012752) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], 140561608012512) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], 140561608012112) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=6 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['step'], 140561608012032) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], 140561608012352) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], 140561608012272) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=7 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['step'], 140561608011872) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], 140561608011472) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], 140561608011712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=8 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['step'], 140561608011792) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], 140561608011552) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], 140561608010672) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=9 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['step'], 140561608010032) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], 140561608011392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], 140561608011312) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=10 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['step'], 140561608004752) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], 140561608005312) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], 140561608004512) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=11 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['step'], 140561608004912) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], 140561608005232) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], 140561608005392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=12 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['step'], 140561608005712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], 140561608004592) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], 140561608005072) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=13 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['step'], 140561608005872) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], 140561608006272) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], 140561608005632) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=14 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['step'], 140561608004992) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], 140561608005952) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], 140561608006192) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=15 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['step'], 140561608006592) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], 140561608006352) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], 140561608006432) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=16 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['step'], 140561608006832) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], 140561608007152) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], 140561608006992) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=17 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['step'], 140561608006912) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], 140561608006752) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], 140561608007552) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=18 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['step'], 140561608006672) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], 140561608007072) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], 140561608007392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=19 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['step'], 140561608007712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], 140561608007232) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], 140561608007952) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=20 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['step'], 140561608007632) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], 140561608008112) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], 140561608008512) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=21 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['step'], 140561608008592) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], 140561608007872) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], 140561608008272) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=22 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['step'], 140561608007792) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], 140561608009152) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], 140561608008832) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=23 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['step'], 140561608008432) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], 140561608009072) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], 140561608009392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=24 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['step'], 140561608009312) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], 140561608009232) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], 140561608008352) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=25 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['step'], 140561608009712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], 140561608009872) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], 140561608008912) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=26 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['step'], 140561608009792) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], 140561608009952) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], 140561608010112) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=27 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['step'], 140561608010432) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], 140561608010192) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], 140561608010272) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=28 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['step'], 140561608010592) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], 140561608011072) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], 140561608010752) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=29 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['step'], 140561608004672) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], 140561608010912) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], 140561608009472) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=30 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['step'], 140562023536560) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], 140561757337888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], 140561654532512) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=31 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['step'], 140561618656672) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], 140561654098144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], 140561654096624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=32 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['step'], 140561654099184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], 140561654098384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], 140561654097904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=33 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['step'], 140561654097424) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], 140561654098304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], 140561608484688) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=34 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['step'], 140561608484368) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], 140561608484608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], 140561608483488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=35 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['step'], 140561608483888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], 140561608484528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], 140561608483808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=36 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['step'], 140561608483328) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], 140561608483648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], 140561608483168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=37 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['step'], 140561608482848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], 140561608482528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], 140561608481968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=38 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['step'], 140561608482288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], 140561608482208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], 140561608482128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=39 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['step'], 140561608481648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], 140561608481568) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], 140561608481408) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=40 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['step'], 140561608481168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], 140561608480768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], 140561608480208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=41 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['step'], 140561608480528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], 140561608480448) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], 140561608480368) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=42 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['step'], 140561608479328) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], 140561608479888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], 140561608479728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=43 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['step'], 140561608479488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], 140561608479088) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], 140561608478528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=44 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['step'], 140561608478848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], 140561608478768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], 140561608478688) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=45 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['step'], 140561608477648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], 140561608478208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], 140561608478048) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=46 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['step'], 140561608477808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], 140561608477408) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], 140561608476848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=47 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['step'], 140561608477168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], 140561608477088) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], 140561608477008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=48 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['step'], 140561608475968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], 140561608476528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], 140561608476368) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=49 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['step'], 140561608476128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], 140561608475808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], 140561608484768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=50 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['step'], 140561608484128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], 140561608483408) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], 140561608482608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=51 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['step'], 140561608480848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], 140561608480128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], 140561608479168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=52 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['step'], 140561608477488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], 140561608476768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], 140561608475728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=53 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['step'], 140561608484448) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], 140561608483968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], 140561608483568) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=54 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['step'], 140561608480928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], 140561608482928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], 140561608482448) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=55 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['step'], 140561608481728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], 140561608481008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], 140561608481488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=56 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['step'], 140561608481088) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], 140561608480688) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], 140561608480288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=57 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['step'], 140561608479248) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], 140561608479808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], 140561608479568) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=58 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['step'], 140561608479008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], 140561608478608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], 140561608478288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=59 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['step'], 140561608478128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], 140561608477888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], 140561608477728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=60 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['step'], 140561608476928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], 140561608476608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], 140561608475888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=61 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['step'], 140561608476208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], 140561608476048) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], 140561608485008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=62 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['step'], 140561608485088) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], 140561608485168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], 140561608484928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=63 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['step'], 140561608485328) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], 140561608485408) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], 140561608485488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=64 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['step'], 140561608485648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], 140561608485728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], 140561608485808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=65 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['step'], 140561608485968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], 140561608486048) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], 140561608486128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=66 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['step'], 140561608486288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], 140561608486368) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], 140561608486448) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=67 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['step'], 140561608486608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], 140561608486688) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], 140561608486768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=68 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['step'], 140561608486928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], 140561608487008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], 140561608487088) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=69 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['step'], 140561608487248) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], 140561608487328) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], 140561608487408) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=70 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['step'], 140561608487568) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], 140561608487648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], 140561608487728) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=71 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['step'], 140561608487888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], 140561608487968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], 140561608488048) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=72 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['step'], 140561608488208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], 140561608488288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], 140561608488368) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=73 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['step'], 140561608488528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], 140561608488608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], 140561608488688) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=74 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['step'], 140561608488848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], 140561608488928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], 140561608489008) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=75 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['step'], 140561608489168) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], 140561608489248) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], 140561608489328) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=76 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['step'], 140561608489488) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], 140561608489568) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], 140561608489648) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=77 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['step'], 140561608489808) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], 140561608489888) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], 140561608489968) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=78 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['step'], 140561608490128) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], 140561608490208) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], 140561608490288) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=79 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['step'], 140561608490448) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], 140561608490528) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], 140561608490608) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=80 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['step'], 140561608490768) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], 140561608490848) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], 140561608490928) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=81 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['step'], 140561608261776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], 140561608261856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], 140561608261936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=82 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['step'], 140561608262096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], 140561608262176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], 140561608262256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=83 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['step'], 140561608262416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], 140561608262496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], 140561608262576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=84 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['step'], 140561608262736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], 140561608262816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], 140561608262896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=85 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['step'], 140561608263056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], 140561608263136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], 140561608263216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=86 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['step'], 140561608263376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], 140561608263456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], 140561608263536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=87 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['step'], 140561608263696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], 140561608263776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], 140561608263856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=88 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['step'], 140561608264016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], 140561608264096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], 140561608264176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=89 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['step'], 140561608264336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], 140561608264416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], 140561608264496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=90 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['step'], 140561608264656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], 140561608264736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], 140561608264816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=91 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['step'], 140561608264976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], 140561608265056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], 140561608265136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=92 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['step'], 140561608265296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], 140561608265376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], 140561608265456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=93 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['step'], 140561608265616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], 140561608265696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], 140561608265776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=94 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['step'], 140561608265936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], 140561608266016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], 140561608266096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=95 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['step'], 140561608266256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], 140561608266336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], 140561608266416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=96 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['step'], 140561608266576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], 140561608266656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], 140561608266736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=97 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['step'], 140561608266896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], 140561608266976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], 140561608267056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=98 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['step'], 140561608267216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], 140561608267296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], 140561608267376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=99 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['step'], 140561608267536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], 140561608267616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], 140561608267696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=100 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['step'], 140561608267856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], 140561608267936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], 140561608268016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=101 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['step'], 140561608268176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], 140561608268256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], 140561608268336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=102 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['step'], 140561608268496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], 140561608268576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], 140561608268656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=103 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['step'], 140561608268816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], 140561608268896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], 140561608268976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=104 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['step'], 140561608269136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], 140561608269216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], 140561608269296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=105 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['step'], 140561608269456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], 140561608269536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], 140561608269616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=106 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['step'], 140561608269776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], 140561608269856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], 140561608269936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=107 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['step'], 140561608270096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], 140561608270176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], 140561608270256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=108 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['step'], 140561608270416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], 140561608270496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], 140561608270576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=109 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['step'], 140561608270736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], 140561608270816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], 140561608270896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=110 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['step'], 140561608271056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], 140561608271136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], 140561608271216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=111 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['step'], 140561608271376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], 140561608271456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], 140561608271536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=112 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['step'], 140561608271696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], 140561608271776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], 140561608271856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=113 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['step'], 140561608272016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], 140561608272096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], 140561608272176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=114 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['step'], 140561608272336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], 140561608272416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], 140561608272496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=115 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['step'], 140561608272656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], 140561608272736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], 140561608272816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=116 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['step'], 140561608272976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], 140561608273056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], 140561608273136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=117 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['step'], 140561608273296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], 140561608273376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], 140561608273456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=118 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['step'], 140561608273616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], 140561608273696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], 140561608273776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=119 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['step'], 140561608273936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], 140561608274016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], 140561608274096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=120 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['step'], 140561608274256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], 140561608274336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], 140561608274416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=121 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['step'], 140561608274576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], 140561608274656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], 140561608274736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=122 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['step'], 140561608274896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], 140561608274976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], 140561608275056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=123 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['step'], 140561608275216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], 140561608275296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], 140561608275376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=124 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['step'], 140561608275536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], 140561608275616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], 140561608275696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=125 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['step'], 140561608275856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], 140561608275936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], 140561608276016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=126 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['step'], 140561608276176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], 140561608276256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], 140561608276336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=127 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['step'], 140561608276496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], 140561608276576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], 140561608276656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=128 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['step'], 140561608276816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], 140561608276896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], 140561608276976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=129 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['step'], 140561608277136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], 140561608277216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], 140561608277296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=130 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['step'], 140561608277456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], 140561608277536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], 140561608277616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=131 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['step'], 140561608277776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], 140561608277856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], 140561608277936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=132 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['step'], 140561608507536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], 140561608507616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], 140561608507696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=133 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['step'], 140561608507856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], 140561608507936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], 140561608508016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=134 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['step'], 140561608508176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], 140561608508256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], 140561608508336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=135 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['step'], 140561608508496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], 140561608508576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], 140561608508656) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=136 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['step'], 140561608508816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], 140561608508896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], 140561608508976) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=137 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['step'], 140561608509136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], 140561608509216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], 140561608509296) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=138 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['step'], 140561608509456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], 140561608509536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], 140561608509616) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=139 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['step'], 140561608509776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], 140561608509856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], 140561608509936) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=140 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['step'], 140561608510096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], 140561608510176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], 140561608510256) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=141 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['step'], 140561608510416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], 140561608510496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], 140561608510576) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=142 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['step'], 140561608510736) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], 140561608510816) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], 140561608510896) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=143 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['step'], 140561608511056) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], 140561608511136) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], 140561608511216) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=144 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['step'], 140561608511376) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], 140561608511456) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], 140561608511536) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=145 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['step'], 140561608511696) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], 140561608511776) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], 140561608511856) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=146 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['step'], 140561608512016) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], 140561608512096) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], 140561608512176) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- KeyValueManager pair at index=147 + | | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]] + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['step'], accessed_by=DictGetItemGuardAccessor(step) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['step'], 140561608512336) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['step'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], 140561608512416) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], 140561608512496) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1]) + | | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False + | | | | | | +- NO_TENSOR_ALIASING + | | | +- GuardManager: source=L['self'].state.default_factory, accessed_by=GetAttrGuardAccessor(default_factory) + | | | | +- ID_MATCH: ___check_obj_id(L['self'].state.default_factory, 94206128762464) + | | +- GuardManager: source=L['self'].param_groups, accessed_by=GetAttrGuardAccessor(param_groups) + | | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups, 94206128766016) + | | | +- LENGTH_CHECK: len(L['self'].param_groups) == 1 + | | | +- GuardManager: source=L['self'].param_groups[0], accessed_by=ListGetItemGuardAccessor(0) + | | | | +- DICT_LENGTH: len(L['self'].param_groups[0]) == 11 + | | | | +- GuardManager: source=L['self'].param_groups[0]['params'], accessed_by=DictGetItemGuardAccessor(params) + | | | | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups[0]['params'], 94206128766016) + | | | | | +- LENGTH_CHECK: len(L['self'].param_groups[0]['params']) == 148 + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][0], accessed_by=ListGetItemGuardAccessor(0) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][0], 140561606584704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][0], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[50304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][0].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][0].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][1], accessed_by=ListGetItemGuardAccessor(1) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][1], 140561606574304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][1], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1024, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][1].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][1].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][2], accessed_by=ListGetItemGuardAccessor(2) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][2], 140561606584384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][2], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][2].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][2].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][3], accessed_by=ListGetItemGuardAccessor(3) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][3], 140561606584544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][3], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][3].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][3].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][4], accessed_by=ListGetItemGuardAccessor(4) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][4], 140561606583584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][4], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][4].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][4].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][5], accessed_by=ListGetItemGuardAccessor(5) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][5], 140561606583504) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][5], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][5].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][5].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][6], accessed_by=ListGetItemGuardAccessor(6) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][6], 140561606583824) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][6], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][6].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][6].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][7], accessed_by=ListGetItemGuardAccessor(7) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][7], 140561606583104) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][7], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][7].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][7].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][8], accessed_by=ListGetItemGuardAccessor(8) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][8], 140561606583904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][8], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][8].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][8].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][9], accessed_by=ListGetItemGuardAccessor(9) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][9], 140561606584464) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][9], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][9].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][9].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][10], accessed_by=ListGetItemGuardAccessor(10) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][10], 140561606582224) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][10], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][10].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][10].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][11], accessed_by=ListGetItemGuardAccessor(11) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][11], 140561606582544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][11], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][11].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][11].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][12], accessed_by=ListGetItemGuardAccessor(12) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][12], 140561606583024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][12], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][12].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][12].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][13], accessed_by=ListGetItemGuardAccessor(13) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][13], 140561606581584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][13], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][13].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][13].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][14], accessed_by=ListGetItemGuardAccessor(14) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][14], 140561606582704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][14], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][14].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][14].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][15], accessed_by=ListGetItemGuardAccessor(15) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][15], 140561606583664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][15], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][15].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][15].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][16], accessed_by=ListGetItemGuardAccessor(16) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][16], 140561606581424) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][16], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][16].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][16].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][17], accessed_by=ListGetItemGuardAccessor(17) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][17], 140561606581744) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][17], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][17].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][17].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][18], accessed_by=ListGetItemGuardAccessor(18) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][18], 140561606582064) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][18], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][18].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][18].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][19], accessed_by=ListGetItemGuardAccessor(19) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][19], 140561606580944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][19], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][19].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][19].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][20], accessed_by=ListGetItemGuardAccessor(20) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][20], 140561606582144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][20], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][20].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][20].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][21], accessed_by=ListGetItemGuardAccessor(21) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][21], 140561606583184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][21], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][21].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][21].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][22], accessed_by=ListGetItemGuardAccessor(22) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][22], 140561606580464) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][22], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][22].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][22].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][23], accessed_by=ListGetItemGuardAccessor(23) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][23], 140561606581104) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][23], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][23].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][23].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][24], accessed_by=ListGetItemGuardAccessor(24) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][24], 140561606580864) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][24], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][24].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][24].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][25], accessed_by=ListGetItemGuardAccessor(25) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][25], 140561606580144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][25], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][25].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][25].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][26], accessed_by=ListGetItemGuardAccessor(26) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][26], 140561606581184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][26], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][26].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][26].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][27], accessed_by=ListGetItemGuardAccessor(27) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][27], 140561606581984) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][27], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][27].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][27].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][28], accessed_by=ListGetItemGuardAccessor(28) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][28], 140561606579984) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][28], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][28].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][28].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][29], accessed_by=ListGetItemGuardAccessor(29) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][29], 140561606580224) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][29], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][29].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][29].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][30], accessed_by=ListGetItemGuardAccessor(30) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][30], 140561606579904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][30], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][30].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][30].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][31], accessed_by=ListGetItemGuardAccessor(31) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][31], 140561606579104) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][31], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][31].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][31].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][32], accessed_by=ListGetItemGuardAccessor(32) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][32], 140561606580384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][32], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][32].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][32].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][33], accessed_by=ListGetItemGuardAccessor(33) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][33], 140561606581344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][33], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][33].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][33].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][34], accessed_by=ListGetItemGuardAccessor(34) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][34], 140561606579024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][34], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][34].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][34].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][35], accessed_by=ListGetItemGuardAccessor(35) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][35], 140561606578944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][35], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][35].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][35].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][36], accessed_by=ListGetItemGuardAccessor(36) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][36], 140561606579584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][36], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][36].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][36].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][37], accessed_by=ListGetItemGuardAccessor(37) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][37], 140561606577984) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][37], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][37].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][37].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][38], accessed_by=ListGetItemGuardAccessor(38) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][38], 140561606579424) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][38], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][38].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][38].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][39], accessed_by=ListGetItemGuardAccessor(39) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][39], 140561606580304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][39], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][39].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][39].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][40], accessed_by=ListGetItemGuardAccessor(40) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][40], 140561606577424) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][40], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][40].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][40].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][41], accessed_by=ListGetItemGuardAccessor(41) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][41], 140561606578144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][41], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][41].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][41].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][42], accessed_by=ListGetItemGuardAccessor(42) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][42], 140561606578464) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][42], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][42].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][42].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][43], accessed_by=ListGetItemGuardAccessor(43) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][43], 140561606577904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][43], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][43].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][43].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][44], accessed_by=ListGetItemGuardAccessor(44) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][44], 140561606578624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][44], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][44].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][44].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][45], accessed_by=ListGetItemGuardAccessor(45) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][45], 140561606578784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][45], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][45].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][45].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][46], accessed_by=ListGetItemGuardAccessor(46) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][46], 140561606576704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][46], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][46].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][46].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][47], accessed_by=ListGetItemGuardAccessor(47) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][47], 140561606577184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][47], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][47].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][47].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][48], accessed_by=ListGetItemGuardAccessor(48) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][48], 140561606577824) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][48], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][48].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][48].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][49], accessed_by=ListGetItemGuardAccessor(49) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][49], 140561606576944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][49], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][49].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][49].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][50], accessed_by=ListGetItemGuardAccessor(50) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][50], 140561606577664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][50], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][50].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][50].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][51], accessed_by=ListGetItemGuardAccessor(51) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][51], 140561606578544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][51], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][51].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][51].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][52], accessed_by=ListGetItemGuardAccessor(52) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][52], 140561606575664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][52], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][52].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][52].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][53], accessed_by=ListGetItemGuardAccessor(53) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][53], 140561606576464) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][53], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][53].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][53].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][54], accessed_by=ListGetItemGuardAccessor(54) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][54], 140561606576864) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][54], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][54].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][54].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][55], accessed_by=ListGetItemGuardAccessor(55) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][55], 140561606576384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][55], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][55].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][55].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][56], accessed_by=ListGetItemGuardAccessor(56) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][56], 140561606577104) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][56], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][56].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][56].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][57], accessed_by=ListGetItemGuardAccessor(57) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][57], 140561606577744) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][57], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][57].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][57].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][58], accessed_by=ListGetItemGuardAccessor(58) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][58], 140561606574864) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][58], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][58].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][58].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][59], accessed_by=ListGetItemGuardAccessor(59) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][59], 140561606575504) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][59], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][59].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][59].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][60], accessed_by=ListGetItemGuardAccessor(60) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][60], 140561606575984) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][60], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][60].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][60].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][61], accessed_by=ListGetItemGuardAccessor(61) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][61], 140561606575424) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][61], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][61].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][61].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][62], accessed_by=ListGetItemGuardAccessor(62) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][62], 140561606575824) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][62], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][62].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][62].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][63], accessed_by=ListGetItemGuardAccessor(63) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][63], 140561606576784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][63], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][63].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][63].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][64], accessed_by=ListGetItemGuardAccessor(64) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][64], 140561606574544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][64], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][64].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][64].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][65], accessed_by=ListGetItemGuardAccessor(65) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][65], 140561606574704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][65], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][65].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][65].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][66], accessed_by=ListGetItemGuardAccessor(66) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][66], 140561606575024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][66], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][66].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][66].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][67], accessed_by=ListGetItemGuardAccessor(67) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][67], 140561606585664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][67], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][67].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][67].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][68], accessed_by=ListGetItemGuardAccessor(68) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][68], 140561606575264) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][68], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][68].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][68].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][69], accessed_by=ListGetItemGuardAccessor(69) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][69], 140561606576304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][69], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][69].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][69].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][70], accessed_by=ListGetItemGuardAccessor(70) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][70], 140561606581664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][70], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][70].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][70].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][71], accessed_by=ListGetItemGuardAccessor(71) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][71], 140561606582304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][71], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][71].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][71].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][72], accessed_by=ListGetItemGuardAccessor(72) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][72], 140561606584064) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][72], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][72].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][72].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][73], accessed_by=ListGetItemGuardAccessor(73) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][73], 140561606580704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][73], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][73].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][73].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][74], accessed_by=ListGetItemGuardAccessor(74) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][74], 140561606583264) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][74], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][74].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][74].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][75], accessed_by=ListGetItemGuardAccessor(75) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][75], 140561606575344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][75], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][75].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][75].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][76], accessed_by=ListGetItemGuardAccessor(76) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][76], 140561606576544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][76], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][76].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][76].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][77], accessed_by=ListGetItemGuardAccessor(77) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][77], 140561606577344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][77], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][77].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][77].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][78], accessed_by=ListGetItemGuardAccessor(78) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][78], 140561606578304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][78], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][78].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][78].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][79], accessed_by=ListGetItemGuardAccessor(79) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][79], 140561606575584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][79], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][79].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][79].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][80], accessed_by=ListGetItemGuardAccessor(80) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][80], 140561606579824) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][80], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][80].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][80].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][81], accessed_by=ListGetItemGuardAccessor(81) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][81], 140561606585024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][81], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][81].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][81].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][82], accessed_by=ListGetItemGuardAccessor(82) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][82], 140561606584944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][82], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][82].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][82].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][83], accessed_by=ListGetItemGuardAccessor(83) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][83], 140561606585104) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][83], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][83].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][83].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][84], accessed_by=ListGetItemGuardAccessor(84) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][84], 140561606583744) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][84], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][84].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][84].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][85], accessed_by=ListGetItemGuardAccessor(85) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][85], 140561606582464) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][85], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][85].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][85].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][86], accessed_by=ListGetItemGuardAccessor(86) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][86], 140561606585584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][86], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][86].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][86].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][87], accessed_by=ListGetItemGuardAccessor(87) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][87], 140561606578864) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][87], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][87].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][87].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][88], accessed_by=ListGetItemGuardAccessor(88) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][88], 140561606583344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][88], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][88].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][88].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][89], accessed_by=ListGetItemGuardAccessor(89) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][89], 140561606583984) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][89], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][89].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][89].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][90], accessed_by=ListGetItemGuardAccessor(90) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][90], 140561606584224) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][90], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][90].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][90].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][91], accessed_by=ListGetItemGuardAccessor(91) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][91], 140561606582784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][91], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][91].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][91].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][92], accessed_by=ListGetItemGuardAccessor(92) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][92], 140561606584784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][92], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][92].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][92].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][93], accessed_by=ListGetItemGuardAccessor(93) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][93], 140561606574944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][93], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][93].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][93].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][94], accessed_by=ListGetItemGuardAccessor(94) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][94], 140561606581824) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][94], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][94].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][94].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][95], accessed_by=ListGetItemGuardAccessor(95) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][95], 140561606582624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][95], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][95].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][95].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][96], accessed_by=ListGetItemGuardAccessor(96) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][96], 140561606582944) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][96], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][96].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][96].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][97], accessed_by=ListGetItemGuardAccessor(97) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][97], 140561606581904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][97], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][97].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][97].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][98], accessed_by=ListGetItemGuardAccessor(98) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][98], 140561606582864) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][98], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][98].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][98].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][99], accessed_by=ListGetItemGuardAccessor(99) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][99], 140561606584304) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][99], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][99].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][99].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][100], accessed_by=ListGetItemGuardAccessor(100) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][100], 140561606580784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][100], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][100].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][100].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][101], accessed_by=ListGetItemGuardAccessor(101) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][101], 140561606581024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][101], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][101].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][101].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][102], accessed_by=ListGetItemGuardAccessor(102) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][102], 140561606581264) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][102], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][102].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][102].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][103], accessed_by=ListGetItemGuardAccessor(103) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][103], 140561606580544) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][103], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][103].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][103].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][104], accessed_by=ListGetItemGuardAccessor(104) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][104], 140561606581504) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][104], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][104].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][104].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][105], accessed_by=ListGetItemGuardAccessor(105) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][105], 140561606580624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][105], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][105].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][105].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][106], accessed_by=ListGetItemGuardAccessor(106) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][106], 140561606579344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][106], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][106].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][106].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][107], accessed_by=ListGetItemGuardAccessor(107) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][107], 140561606579504) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][107], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][107].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][107].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][108], accessed_by=ListGetItemGuardAccessor(108) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][108], 140561606579664) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][108], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][108].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][108].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][109], accessed_by=ListGetItemGuardAccessor(109) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][109], 140561606579264) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][109], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][109].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][109].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][110], accessed_by=ListGetItemGuardAccessor(110) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][110], 140561606577024) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][110], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][110].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][110].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][111], accessed_by=ListGetItemGuardAccessor(111) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][111], 140561606579184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][111], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][111].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][111].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][112], accessed_by=ListGetItemGuardAccessor(112) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][112], 140561606578064) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][112], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][112].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][112].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][113], accessed_by=ListGetItemGuardAccessor(113) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][113], 140561606575744) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][113], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][113].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][113].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][114], accessed_by=ListGetItemGuardAccessor(114) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][114], 140561606578224) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][114], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][114].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][114].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][115], accessed_by=ListGetItemGuardAccessor(115) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][115], 140561606577584) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][115], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][115].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][115].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][116], accessed_by=ListGetItemGuardAccessor(116) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][116], 140561606578704) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][116], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][116].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][116].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][117], accessed_by=ListGetItemGuardAccessor(117) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][117], 140561606580064) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][117], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][117].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][117].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][118], accessed_by=ListGetItemGuardAccessor(118) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][118], 140561606585744) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][118], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][118].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][118].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][119], accessed_by=ListGetItemGuardAccessor(119) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][119], 140561606576064) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][119], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][119].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][119].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][120], accessed_by=ListGetItemGuardAccessor(120) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][120], 140561606577264) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][120], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][120].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][120].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][121], accessed_by=ListGetItemGuardAccessor(121) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][121], 140561606576224) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][121], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][121].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][121].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][122], accessed_by=ListGetItemGuardAccessor(122) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][122], 140561606576624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][122], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][122].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][122].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][123], accessed_by=ListGetItemGuardAccessor(123) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][123], 140561606578384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][123], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][123].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][123].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][124], accessed_by=ListGetItemGuardAccessor(124) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][124], 140561606574784) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][124], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][124].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][124].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][125], accessed_by=ListGetItemGuardAccessor(125) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][125], 140561606575184) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][125], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][125].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][125].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][126], accessed_by=ListGetItemGuardAccessor(126) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][126], 140565185705792) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][126], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][126].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][126].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][127], accessed_by=ListGetItemGuardAccessor(127) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][127], 140561606574624) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][127], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][127].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][127].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][128], accessed_by=ListGetItemGuardAccessor(128) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][128], 140561606576144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][128], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][128].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][128].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][129], accessed_by=ListGetItemGuardAccessor(129) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][129], 140561606577504) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][129], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][129].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][129].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][130], accessed_by=ListGetItemGuardAccessor(130) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][130], 140561608015712) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][130], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][130].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][130].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][131], accessed_by=ListGetItemGuardAccessor(131) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][131], 140561608015392) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][131], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][131].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][131].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][132], accessed_by=ListGetItemGuardAccessor(132) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][132], 140561608015312) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][132], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][132].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][132].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][133], accessed_by=ListGetItemGuardAccessor(133) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][133], 140561608015472) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][133], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][133].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][133].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][134], accessed_by=ListGetItemGuardAccessor(134) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][134], 140561606584144) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][134], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][134].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][134].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][135], accessed_by=ListGetItemGuardAccessor(135) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][135], 140561606575904) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][135], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][135].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][135].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][136], accessed_by=ListGetItemGuardAccessor(136) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][136], 140561608015232) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][136], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][136].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][136].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][137], accessed_by=ListGetItemGuardAccessor(137) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][137], 140561608014832) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][137], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][137].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][137].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][138], accessed_by=ListGetItemGuardAccessor(138) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][138], 140561608014992) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][138], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][138].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][138].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][139], accessed_by=ListGetItemGuardAccessor(139) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][139], 140561608014912) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][139], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][139].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][139].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][140], accessed_by=ListGetItemGuardAccessor(140) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][140], 140561608015632) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][140], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][140].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][140].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][141], accessed_by=ListGetItemGuardAccessor(141) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][141], 140561608015552) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][141], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][141].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][141].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][142], accessed_by=ListGetItemGuardAccessor(142) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][142], 140561608014752) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][142], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][142].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][142].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][143], accessed_by=ListGetItemGuardAccessor(143) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][143], 140561608014432) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][143], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][143].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][143].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][144], accessed_by=ListGetItemGuardAccessor(144) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][144], 140561608015152) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][144], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][144].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][144].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][145], accessed_by=ListGetItemGuardAccessor(145) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][145], 140561608014512) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][145], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][145].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][145].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][146], accessed_by=ListGetItemGuardAccessor(146) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][146], 140561606585344) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][146], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][146].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][146].grad is not None + | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][147], accessed_by=ListGetItemGuardAccessor(147) + | | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][147], 140561606574384) + | | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][147], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1]) + | | | | | | +- NO_TENSOR_ALIASING + | | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][147].grad, accessed_by=GradGuardAccessor(grad) + | | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][147].grad is not None + | | | | +- GuardManager: source=L['self'].param_groups[0]['lr'], accessed_by=DictGetItemGuardAccessor(lr) + | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['lr'] == 0.01 + | | | | +- GuardManager: source=L['self'].param_groups[0]['betas'], accessed_by=DictGetItemGuardAccessor(betas) + | | | | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups[0]['betas'], 94206128741824) + | | | | | +- LENGTH_CHECK: len(L['self'].param_groups[0]['betas']) == 2 + | | | | | +- GuardManager: source=L['self'].param_groups[0]['betas'][0], accessed_by=TupleGetItemGuardAccessor(0) + | | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['betas'][0] == 0.9 + | | | | | +- GuardManager: source=L['self'].param_groups[0]['betas'][1], accessed_by=TupleGetItemGuardAccessor(1) + | | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['betas'][1] == 0.999 + | | | | +- GuardManager: source=L['self'].param_groups[0]['eps'], accessed_by=DictGetItemGuardAccessor(eps) + | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['eps'] == 1e-08 + | | | | +- GuardManager: source=L['self'].param_groups[0]['weight_decay'], accessed_by=DictGetItemGuardAccessor(weight_decay) + | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['weight_decay'] == 0 + | | | | +- GuardManager: source=L['self'].param_groups[0]['amsgrad'], accessed_by=DictGetItemGuardAccessor(amsgrad) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['amsgrad'], 94206128801376) + | | | | +- GuardManager: source=L['self'].param_groups[0]['maximize'], accessed_by=DictGetItemGuardAccessor(maximize) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['maximize'], 94206128801376) + | | | | +- GuardManager: source=L['self'].param_groups[0]['foreach'], accessed_by=DictGetItemGuardAccessor(foreach) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['foreach'], 94206128801408) + | | | | +- GuardManager: source=L['self'].param_groups[0]['capturable'], accessed_by=DictGetItemGuardAccessor(capturable) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['capturable'], 94206128801408) + | | | | +- GuardManager: source=L['self'].param_groups[0]['differentiable'], accessed_by=DictGetItemGuardAccessor(differentiable) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['differentiable'], 94206128801376) + | | | | +- GuardManager: source=L['self'].param_groups[0]['fused'], accessed_by=DictGetItemGuardAccessor(fused) + | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['fused'], 94206128752608) + | +- GuardManager: source=L['closure'], accessed_by=DictGetItemGuardAccessor(closure) + | | +- ID_MATCH: ___check_obj_id(L['closure'], 94206128752608) + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['adam'], accessed_by=DictGetItemGuardAccessor(adam) + | | | +- GuardManager: source=G['adam'].__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__code__, 140563175561632) + | | | +- GuardManager: source=G['adam'].__closure__, accessed_by=GetAttrGuardAccessor(__closure__) + | | | | +- GuardManager: source=G['adam'].__closure__[0], accessed_by=TupleGetItemGuardAccessor(0) + | | | | | +- GuardManager: source=G['adam'].__closure__[0].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents) + | | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[0].cell_contents, 140563175768128) + | | | | +- GuardManager: source=G['adam'].__closure__[1], accessed_by=TupleGetItemGuardAccessor(1) + | | | | | +- GuardManager: source=G['adam'].__closure__[1].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents) + | | | | | | +- GuardManager: source=G['adam'].__closure__[1].cell_contents.__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[1].cell_contents.__code__, 140563175708560) + | | | | +- GuardManager: source=G['adam'].__closure__[2], accessed_by=TupleGetItemGuardAccessor(2) + | | | | | +- GuardManager: source=G['adam'].__closure__[2].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents) + | | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[2].cell_contents, 94206128801408) + | | | | +- GuardManager: source=G['adam'].__closure__[3], accessed_by=TupleGetItemGuardAccessor(3) + | | | | | +- GuardManager: source=G['adam'].__closure__[3].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents) + | | | | | | +- EQUALS_MATCH: G['adam'].__closure__[3].cell_contents == 5 + | | +- GuardManager: source=G['Tensor'], accessed_by=DictGetItemGuardAccessor(Tensor) + | | | +- ID_MATCH: ___check_obj_id(G['Tensor'], 94206193171168) + | | +- GuardManager: source=G['Optimizer'], accessed_by=DictGetItemGuardAccessor(Optimizer) + | | | +- ID_MATCH: ___check_obj_id(G['Optimizer'], 94206202190960) + | | | +- GuardManager: source=G['Optimizer']._group_tensors_by_device_and_dtype, accessed_by=GetAttrGuardAccessor(_group_tensors_by_device_and_dtype) + | | | | +- GuardManager: source=G['Optimizer']._group_tensors_by_device_and_dtype.__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | | | +- ID_MATCH: ___check_obj_id(G['Optimizer']._group_tensors_by_device_and_dtype.__code__, 140563175565504) + | | +- GuardManager: source=G['_multi_tensor_adam'], accessed_by=DictGetItemGuardAccessor(_multi_tensor_adam) + | | | +- GuardManager: source=G['_multi_tensor_adam'].__code__, accessed_by=GetAttrGuardAccessor(__code__) + | | | | +- ID_MATCH: ___check_obj_id(G['_multi_tensor_adam'].__code__, 140563175707856) + | | +- GuardManager: source=G['__builtins_dict___23'], accessed_by=DictGetItemGuardAccessor(__builtins_dict___23) + | | | +- GuardManager: source=G['__builtins_dict___23']['len'], accessed_by=DictGetItemGuardAccessor(len) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['len'], 140565189726736) + | | | +- GuardManager: source=G['__builtins_dict___23']['list'], accessed_by=DictGetItemGuardAccessor(list) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['list'], 94206128766016) + | | | +- GuardManager: source=G['__builtins_dict___23']['range'], accessed_by=DictGetItemGuardAccessor(range) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['range'], 94206128748288) + | | | +- GuardManager: source=G['__builtins_dict___23']['getattr'], accessed_by=DictGetItemGuardAccessor(getattr) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['getattr'], 140565189725856) + | | | +- GuardManager: source=G['__builtins_dict___23']['isinstance'], accessed_by=DictGetItemGuardAccessor(isinstance) + | | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['isinstance'], 140565189726416) + | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_optim_dot_optimizer) + | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'], 140563175468672) + | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch, accessed_by=GetAttrGuardAccessor(torch) + | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch, 140565184683664) + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.jit, accessed_by=GetAttrGuardAccessor(jit) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.jit, 140563222375024) + | | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.jit.is_scripting, accessed_by=GetAttrGuardAccessor(is_scripting) + | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.jit.is_scripting, 140563303435856) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.compiler, 140562864251632) + | | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.compiler.is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling) + | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.compiler.is_compiling, 140562863847696) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_pow, accessed_by=GetAttrGuardAccessor(_foreach_pow) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_pow, 140565181190432) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_add_, accessed_by=GetAttrGuardAccessor(_foreach_add_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_add_, 140565181126176) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_div_, accessed_by=GetAttrGuardAccessor(_foreach_div_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_div_, 140565181078464) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_mul_, accessed_by=GetAttrGuardAccessor(_foreach_mul_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_mul_, 140565181188672) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_neg_, accessed_by=GetAttrGuardAccessor(_foreach_neg_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_neg_, 140565181128096) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt, accessed_by=GetAttrGuardAccessor(_foreach_sqrt) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt, 140565181128736) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sub_, accessed_by=GetAttrGuardAccessor(_foreach_sub_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sub_, 140565181126336) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_lerp_, accessed_by=GetAttrGuardAccessor(_foreach_lerp_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_lerp_, 140565181079904) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt_, accessed_by=GetAttrGuardAccessor(_foreach_sqrt_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt_, 140565181128816) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcdiv_, accessed_by=GetAttrGuardAccessor(_foreach_addcdiv_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcdiv_, 140565181189552) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcmul_, accessed_by=GetAttrGuardAccessor(_foreach_addcmul_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcmul_, 140565181079024) + | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_reciprocal_, accessed_by=GetAttrGuardAccessor(_foreach_reciprocal_) + | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_reciprocal_, 140565181190752) + | | +- GuardManager: source=G['__optimizer_140561654732528_140560320971456_c14'](), accessed_by=GlobalWeakRefGuardAccessor(__optimizer_140561654732528_140560320971456_c14) + | | | +- NOT_NONE: G['__optimizer_140561654732528_140560320971456_c14']() is not None + | | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor(torch) + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch'] + +V0806 13:56:22.656000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "6373403406566b0d07d809832e4636e9"} + { + "name": "entire_frame_compile", + "ts": 1722977782656800.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.656000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "403a651c09acb31ec87103d6cb0ce6c4"} + { + "name": "_compile.compile_inner", + "ts": 1722977782656905.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.657000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "7/0", "frame_key": "10", "co_name": "step", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/adam.py", "co_firstlineno": 197, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 1564, "shape_env_guard_count": 0, "graph_op_count": 461, "graph_node_count": 1202, "graph_input_count": 740, "start_time": 1722977765.99917, "entire_frame_compile_time_s": 16.65777063369751, "backend_compile_time_s": 15.009882926940918, "inductor_compile_time_s": 12.278924465179443, "code_gen_time_s": 8.707001686096191, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:22.661000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "933b77f235a58b3669385c39e9b5c847"} + { + "name": "cudagraphify", + "ts": 1722977782661519.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:22.661000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a4d8ac42e95374728a2395c2b83481b7"} + { + "name": "cudagraphify", + "ts": 1722977782661842.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.669000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 478, "name": "wrapper", "filename": 4}, {"line": 478, "name": "torch_dynamo_resume_in_wrapper_at_478", "filename": 4}]}, "frame_id": 8, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:22.669000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "be3193b9ffb67bac58a91607b0886088"} + { + "name": "_compile.compile_inner", + "ts": 1722977782669412.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:22.669000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f6f07a6528d283fa873344e0ed477c20"} + { + "name": "entire_frame_compile", + "ts": 1722977782669488.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a463cd5fab0e85ef4cdb2d49ec071c30"} + { + "name": "entire_frame_compile", + "ts": 1722977782672002.0, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0b15ffeae50ebb8d1de380fc9eddce48"} + { + "name": "_compile.compile_inner", + "ts": 1722977782672101.5, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "8/0", "frame_key": "11", "co_name": "torch_dynamo_resume_in_wrapper_at_478", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", "co_firstlineno": 478, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "start_time": 1722977782.6693876, "entire_frame_compile_time_s": null, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0028078556060791016, "has_guarded_code": false}, "frame_id": 8, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:22.672000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_446", "filename": 1}]}, "frame_id": 9, "frame_compile_id": 0, "attempt": 0} +V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "70a9b0f3c76c037192abe119fdeef60f"} + { + "name": "_compile.compile_inner", + "ts": 1722977782672775.2, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ccea687f80d2b7fe110135062e930fa4"} + { + "name": "entire_frame_compile", + "ts": 1722977782672841.5, + "args": null, + "ph": "B", + "pid": 0 + } +V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 316, "size": 201216}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 3, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 1, 50304], "requires_grad": true, "stride": [50304, 50304, 1], "storage": 0, "view_func": "", "describer_id": 316}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 316, "id": 0, "source": "L['pred']"}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 316, "size": 4}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 1, "view_func": "", "describer_id": 316}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 316, "id": 1, "source": "L['loss']"}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} +V0806 13:56:22.679000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1, "has_payload": "fb4ecbe50cbd7aa2ddaad613c4ad31e3"} + + TREE_GUARD_MANAGER: + +- RootGuardManager + | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:462 in init_ambient_guards + | +- GLOBAL_STATE: ___check_global_state() + | +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod) + | | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328) + | +- GuardManager: source=L['loss'], accessed_by=DictGetItemGuardAccessor(loss) + | | +- TENSOR_MATCH: check_tensor(L['loss'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[], stride=[]) + | | +- NO_HASATTR: hasattr(L['loss'], '_dynamo_dynamic_indices') == False + | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['loss'], L['pred']) + | +- GuardManager: source=L['pred'], accessed_by=DictGetItemGuardAccessor(pred) + | | +- TENSOR_MATCH: check_tensor(L['pred'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1, 1, 50304], stride=[50304, 50304, 1]) + | | +- NO_HASATTR: hasattr(L['pred'], '_dynamo_dynamic_indices') == False + | | +- NO_TENSOR_ALIASING + | +- GuardManager: source=L['cloned_inputs'], accessed_by=DictGetItemGuardAccessor(cloned_inputs) + | | +- TYPE_MATCH: ___check_type_id(L['cloned_inputs'], 94206128766016) + | | +- LENGTH_CHECK: len(L['cloned_inputs']) == 1 + | +- GuardManager: source=L['collect_outputs'], accessed_by=DictGetItemGuardAccessor(collect_outputs) + | | +- ID_MATCH: ___check_obj_id(L['collect_outputs'], 94206128801408) + | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor + | | +- GuardManager: source=G['collect_results'], accessed_by=DictGetItemGuardAccessor(collect_results) + | | | +- ID_MATCH: ___check_obj_id(G['collect_results'], 140561699517296) + +V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f291c5d56f337d7413c8577515cab788"} + { + "name": "entire_frame_compile", + "ts": 1722977782679682.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "186863a8eb64744b41d4349598d565d8"} + { + "name": "_compile.compile_inner", + "ts": 1722977782679751.8, + "args": null, + "ph": "E", + "pid": 0 + } +V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "9/0", "frame_key": "13", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_446", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 446, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 11, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 2, "graph_input_count": 2, "start_time": 1722977782.672749, "entire_frame_compile_time_s": 0.007033109664916992, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function collect_results in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.0009343624114990234, "has_guarded_code": true}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 74f19ea..4a3b4d4 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -128,3 +128,27 @@ fn test_parse_artifact() { ); } } + +#[test] +fn test_parse_chromium_event() { + let expected_files = ["chromium_events.json", "index.html"]; + // Read the test file + // simple.log was generated from the following: + // TORCH_TRACE=~/trace_logs/test python test/inductor/test_torchinductor.py -k TORCH_TRACE=~/trace_logs/comp_metrics python test/dynamo/test_misc.py -k test_graph_break_compilation_metrics_on_failure + let path = Path::new("tests/inputs/chromium_nanogpt_cache_miss.log").to_path_buf(); + let config = tlparse::ParseConfig { + strict: true, + ..Default::default() + }; + let output = tlparse::parse_path(&path, config); + assert!(output.is_ok()); + let map: HashMap = output.unwrap().into_iter().collect(); + // Check all files are present + for prefix in expected_files { + assert!( + prefix_exists(&map, prefix), + "{} not found in output", + prefix + ); + } +}