-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCoreMLInspect-OpenELM-1B-Instruct-Compiled-Model-CPU-NE.txt
3453 lines (3452 loc) · 275 KB
/
CoreMLInspect-OpenELM-1B-Instruct-Compiled-Model-CPU-NE.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Analyzing model for compute unit [cpuAndNeuralEngine]...
Key: C=CPU, G=GPU, N=NeuralEngine
<Estimate of total operation cost>% <primary compute|supported compute> <operation>
func main(input_ids) {
var_5 = const()
var_22 = const()
inputs_embeds_axis_0 = const()
inputs_embeds_batch_dims_0 = const()
inputs_embeds_validate_indices_0 = const()
model_transformer_token_embeddings_weight_to_fp16 = const()
input_ids_to_int16_dtype_0 = const()
nan% C| cast_58 = ios17.cast(x: ["input_ids"], dtype: ["input_ids_to_int16_dtype_0"])
nan% C|G inputs_embeds_cast_fp16_cast_int16 = ios17.gather(axis: ["inputs_embeds_axis_0"], indices: ["cast_58"], x: ["model_transformer_token_embeddings_weight_to_fp16"], batch_dims: ["inputs_embeds_batch_dims_0"], validate_indices: ["inputs_embeds_validate_indices_0"])
var_10_promoted_to_fp16 = const()
nan% C|GN var_125_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16"], x: ["inputs_embeds_cast_fp16_cast_int16"])
var_126 = const()
nan% C|GN var_127_cast_fp16 = ios16.reduce_mean(x: ["var_125_cast_fp16"], axes: ["var_126"], keep_dims: ["var_22"])
var_128_to_fp16 = const()
nan% C|GN var_129_cast_fp16 = ios17.add(y: ["var_128_to_fp16"], x: ["var_127_cast_fp16"])
var_130_epsilon_0 = const()
nan% C|GN var_130_cast_fp16 = ios17.rsqrt(x: ["var_129_cast_fp16"], epsilon: ["var_130_epsilon_0"])
nan% C|GN var_131_cast_fp16 = ios17.mul(x: ["inputs_embeds_cast_fp16_cast_int16"], y: ["var_130_cast_fp16"])
model_transformer_layers_0_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_1_cast_fp16 = ios17.mul(x: ["var_131_cast_fp16"], y: ["model_transformer_layers_0_attn_norm_weight_to_fp16"])
model_transformer_layers_0_attn_qkv_proj_weight_to_fp16 = const()
linear_0_bias_0_to_fp16 = const()
nan% C|GN linear_0_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_0_attn_qkv_proj_weight_to_fp16"], bias: ["linear_0_bias_0_to_fp16"], x: ["hidden_states_1_cast_fp16"])
var_145 = const()
nan% C|GN qkv_3_cast_fp16 = ios17.reshape(x: ["linear_0_cast_fp16"], shape: ["var_145"])
var_147_perm_0 = const()
var_148 = const()
var_149_axis_0 = const()
nan% C|GN transpose_167 = ios17.transpose(x: ["qkv_3_cast_fp16"], perm: ["var_147_perm_0"])
nan% C|GN var_149_cast_fp16_0, var_149_cast_fp16_1, var_149_cast_fp16_2 = split(axis: ["var_149_axis_0"], split_sizes: ["var_148"], x: ["transpose_167"])
var_10_promoted_to_fp16_1 = const()
nan% C|GN var_155_cast_fp16 = ios17.pow(x: ["var_149_cast_fp16_0"], y: ["var_10_promoted_to_fp16_1"])
var_156 = const()
nan% C|GN var_157_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_156"], x: ["var_155_cast_fp16"])
var_158_to_fp16 = const()
nan% C|GN var_159_cast_fp16 = ios17.add(y: ["var_158_to_fp16"], x: ["var_157_cast_fp16"])
var_160_epsilon_0 = const()
nan% C|GN var_160_cast_fp16 = ios17.rsqrt(epsilon: ["var_160_epsilon_0"], x: ["var_159_cast_fp16"])
nan% C|GN var_161_cast_fp16 = ios17.mul(y: ["var_160_cast_fp16"], x: ["var_149_cast_fp16_0"])
model_transformer_layers_0_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_1_cast_fp16 = ios17.mul(y: ["model_transformer_layers_0_attn_q_norm_weight_to_fp16"], x: ["var_161_cast_fp16"])
var_10_promoted_to_fp16_2 = const()
nan% C|GN var_166_cast_fp16 = ios17.pow(x: ["var_149_cast_fp16_1"], y: ["var_10_promoted_to_fp16_2"])
var_167 = const()
nan% C|GN var_168_cast_fp16 = ios16.reduce_mean(x: ["var_166_cast_fp16"], axes: ["var_167"], keep_dims: ["var_22"])
var_169_to_fp16 = const()
nan% C|GN var_170_cast_fp16 = ios17.add(y: ["var_169_to_fp16"], x: ["var_168_cast_fp16"])
var_171_epsilon_0 = const()
nan% C|GN var_171_cast_fp16 = ios17.rsqrt(epsilon: ["var_171_epsilon_0"], x: ["var_170_cast_fp16"])
nan% C|GN var_172_cast_fp16 = ios17.mul(y: ["var_171_cast_fp16"], x: ["var_149_cast_fp16_1"])
model_transformer_layers_0_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_1_cast_fp16 = ios17.mul(x: ["var_172_cast_fp16"], y: ["model_transformer_layers_0_attn_k_norm_weight_to_fp16"])
var_187_to_fp16 = const()
nan% C|GN var_189_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_1_cast_fp16"])
var_190_split_sizes_0 = const()
var_190_axis_0 = const()
nan% C|GN var_190_cast_fp16_0, var_190_cast_fp16_1 = split(split_sizes: ["var_190_split_sizes_0"], x: ["query_1_cast_fp16"], axis: ["var_190_axis_0"])
const_6_promoted_to_fp16 = const()
nan% C|GN var_192_cast_fp16 = ios17.mul(y: ["const_6_promoted_to_fp16"], x: ["var_190_cast_fp16_1"])
var_194_interleave_0 = const()
nan% C|GN var_194_cast_fp16 = ios17.concat(values: ["var_192_cast_fp16", "var_190_cast_fp16_0"], interleave: ["var_194_interleave_0"], axis: ["var_5"])
var_183_to_fp16 = const()
nan% C|GN var_195_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_194_cast_fp16"])
nan% C|GN query_float_1_cast_fp16 = ios17.add(x: ["var_189_cast_fp16"], y: ["var_195_cast_fp16"])
nan% C|GN var_201_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["key_1_cast_fp16"])
var_202_split_sizes_0 = const()
var_202_axis_0 = const()
nan% C|GN var_202_cast_fp16_0, var_202_cast_fp16_1 = split(split_sizes: ["var_202_split_sizes_0"], axis: ["var_202_axis_0"], x: ["key_1_cast_fp16"])
const_7_promoted_to_fp16 = const()
nan% C|GN var_204_cast_fp16 = ios17.mul(x: ["var_202_cast_fp16_1"], y: ["const_7_promoted_to_fp16"])
var_206_interleave_0 = const()
nan% C|GN var_206_cast_fp16 = ios17.concat(interleave: ["var_206_interleave_0"], axis: ["var_5"], values: ["var_204_cast_fp16", "var_202_cast_fp16_0"])
nan% C|GN var_207_cast_fp16 = ios17.mul(x: ["var_206_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN key_float_1_cast_fp16 = ios17.add(x: ["var_201_cast_fp16"], y: ["var_207_cast_fp16"])
reshape_0_shape_0 = const()
nan% C|GN reshape_0_cast_fp16 = ios17.reshape(x: ["key_float_1_cast_fp16"], shape: ["reshape_0_shape_0"])
tile_0_reps_0 = const()
nan% C|GN tile_0_cast_fp16 = tile(reps: ["tile_0_reps_0"], x: ["reshape_0_cast_fp16"])
reshape_1_shape_0 = const()
nan% C|GN reshape_1_cast_fp16 = ios17.reshape(shape: ["reshape_1_shape_0"], x: ["tile_0_cast_fp16"])
transpose_0_perm_0 = const()
keys_3_shape_0 = const()
nan% C|GN transpose_166 = ios17.transpose(perm: ["transpose_0_perm_0"], x: ["reshape_1_cast_fp16"])
nan% C|GN keys_3_cast_fp16 = ios17.reshape(x: ["transpose_166"], shape: ["keys_3_shape_0"])
reshape_2_shape_0 = const()
nan% C|GN reshape_2_cast_fp16 = ios17.reshape(shape: ["reshape_2_shape_0"], x: ["var_149_cast_fp16_2"])
tile_1_reps_0 = const()
nan% C|GN tile_1_cast_fp16 = tile(reps: ["tile_1_reps_0"], x: ["reshape_2_cast_fp16"])
reshape_3_shape_0 = const()
nan% C|GN reshape_3_cast_fp16 = ios17.reshape(x: ["tile_1_cast_fp16"], shape: ["reshape_3_shape_0"])
transpose_1_perm_0 = const()
values_3_shape_0 = const()
nan% C|GN transpose_165 = ios17.transpose(x: ["reshape_3_cast_fp16"], perm: ["transpose_1_perm_0"])
nan% C|GN values_3_cast_fp16 = ios17.reshape(shape: ["values_3_shape_0"], x: ["transpose_165"])
mul_0_y_0_to_fp16 = const()
nan% C|GN mul_0_cast_fp16 = ios17.mul(y: ["mul_0_y_0_to_fp16"], x: ["query_float_1_cast_fp16"])
matmul_0_transpose_y_0 = const()
matmul_0_transpose_x_0 = const()
nan% C|GN matmul_0_cast_fp16 = ios17.matmul(transpose_x: ["matmul_0_transpose_x_0"], x: ["mul_0_cast_fp16"], y: ["keys_3_cast_fp16"], transpose_y: ["matmul_0_transpose_y_0"])
causal_mask_3_to_fp16 = const()
nan% C|GN add_1_cast_fp16 = ios17.add(x: ["matmul_0_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_0_axis_0 = const()
nan% C|GN softmax_0_cast_fp16 = ios16.softmax(x: ["add_1_cast_fp16"], axis: ["softmax_0_axis_0"])
attn_output_1_transpose_x_0 = const()
attn_output_1_transpose_y_0 = const()
nan% C|GN attn_output_1_cast_fp16 = ios17.matmul(transpose_x: ["attn_output_1_transpose_x_0"], transpose_y: ["attn_output_1_transpose_y_0"], y: ["values_3_cast_fp16"], x: ["softmax_0_cast_fp16"])
var_223_perm_0 = const()
var_225 = const()
nan% C|GN transpose_164 = ios17.transpose(perm: ["var_223_perm_0"], x: ["attn_output_1_cast_fp16"])
nan% C|GN input_3_cast_fp16 = ios17.reshape(x: ["transpose_164"], shape: ["var_225"])
model_transformer_layers_0_attn_out_proj_weight_to_fp16 = const()
linear_1_bias_0_to_fp16 = const()
nan% C|GN linear_1_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_3_cast_fp16"], weight: ["model_transformer_layers_0_attn_out_proj_weight_to_fp16"])
nan% C|GN x_15_cast_fp16 = ios17.add(y: ["linear_1_cast_fp16"], x: ["inputs_embeds_cast_fp16_cast_int16"])
var_10_promoted_to_fp16_3 = const()
nan% C|GN var_232_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_3"], x: ["x_15_cast_fp16"])
var_233 = const()
nan% C|GN var_234_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_233"], x: ["var_232_cast_fp16"])
var_235_to_fp16 = const()
nan% C|GN var_236_cast_fp16 = ios17.add(y: ["var_235_to_fp16"], x: ["var_234_cast_fp16"])
var_237_epsilon_0 = const()
nan% C|GN var_237_cast_fp16 = ios17.rsqrt(x: ["var_236_cast_fp16"], epsilon: ["var_237_epsilon_0"])
nan% C|GN var_238_cast_fp16 = ios17.mul(x: ["x_15_cast_fp16"], y: ["var_237_cast_fp16"])
model_transformer_layers_0_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_5_cast_fp16 = ios17.mul(y: ["model_transformer_layers_0_ffn_norm_weight_to_fp16"], x: ["var_238_cast_fp16"])
model_transformer_layers_0_ffn_proj_1_weight_to_fp16 = const()
nan% C|GN linear_2_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_0_ffn_proj_1_weight_to_fp16"], x: ["input_5_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
var_248_split_sizes_0 = const()
var_248_axis_0 = const()
nan% C|GN var_248_cast_fp16_0, var_248_cast_fp16_1 = split(x: ["linear_2_cast_fp16"], axis: ["var_248_axis_0"], split_sizes: ["var_248_split_sizes_0"])
nan% C|GN var_250_cast_fp16 = ios16.silu(x: ["var_248_cast_fp16_0"])
nan% C|GN input_9_cast_fp16 = ios17.mul(x: ["var_250_cast_fp16"], y: ["var_248_cast_fp16_1"])
model_transformer_layers_0_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_3_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_0_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_9_cast_fp16"])
nan% C|GN x_19_cast_fp16 = ios17.add(x: ["x_15_cast_fp16"], y: ["linear_3_cast_fp16"])
var_10_promoted_to_fp16_4 = const()
nan% C|GN var_261_cast_fp16 = ios17.pow(x: ["x_19_cast_fp16"], y: ["var_10_promoted_to_fp16_4"])
var_262 = const()
nan% C|GN var_263_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_261_cast_fp16"], axes: ["var_262"])
var_264_to_fp16 = const()
nan% C|GN var_265_cast_fp16 = ios17.add(x: ["var_263_cast_fp16"], y: ["var_264_to_fp16"])
var_266_epsilon_0 = const()
nan% C|GN var_266_cast_fp16 = ios17.rsqrt(x: ["var_265_cast_fp16"], epsilon: ["var_266_epsilon_0"])
nan% C|GN var_267_cast_fp16 = ios17.mul(y: ["var_266_cast_fp16"], x: ["x_19_cast_fp16"])
model_transformer_layers_1_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_7_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_attn_norm_weight_to_fp16"], x: ["var_267_cast_fp16"])
model_transformer_layers_1_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_4_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_1_attn_qkv_proj_weight_to_fp16"], bias: ["linear_0_bias_0_to_fp16"], x: ["hidden_states_7_cast_fp16"])
var_281 = const()
nan% C|GN qkv_7_cast_fp16 = ios17.reshape(shape: ["var_281"], x: ["linear_4_cast_fp16"])
var_283_perm_0 = const()
var_284 = const()
var_285_axis_0 = const()
nan% C|GN transpose_163 = ios17.transpose(x: ["qkv_7_cast_fp16"], perm: ["var_283_perm_0"])
nan% C|GN var_285_cast_fp16_0, var_285_cast_fp16_1, var_285_cast_fp16_2 = split(axis: ["var_285_axis_0"], x: ["transpose_163"], split_sizes: ["var_284"])
var_10_promoted_to_fp16_5 = const()
nan% C|GN var_291_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_5"], x: ["var_285_cast_fp16_0"])
var_292 = const()
nan% C|GN var_293_cast_fp16 = ios16.reduce_mean(axes: ["var_292"], x: ["var_291_cast_fp16"], keep_dims: ["var_22"])
var_294_to_fp16 = const()
nan% C|GN var_295_cast_fp16 = ios17.add(y: ["var_294_to_fp16"], x: ["var_293_cast_fp16"])
var_296_epsilon_0 = const()
nan% C|GN var_296_cast_fp16 = ios17.rsqrt(x: ["var_295_cast_fp16"], epsilon: ["var_296_epsilon_0"])
nan% C|GN var_297_cast_fp16 = ios17.mul(x: ["var_285_cast_fp16_0"], y: ["var_296_cast_fp16"])
model_transformer_layers_1_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_3_cast_fp16 = ios17.mul(x: ["var_297_cast_fp16"], y: ["model_transformer_layers_1_attn_q_norm_weight_to_fp16"])
var_10_promoted_to_fp16_6 = const()
nan% C|GN var_302_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_6"], x: ["var_285_cast_fp16_1"])
var_303 = const()
nan% C|GN var_304_cast_fp16 = ios16.reduce_mean(x: ["var_302_cast_fp16"], keep_dims: ["var_22"], axes: ["var_303"])
var_305_to_fp16 = const()
nan% C|GN var_306_cast_fp16 = ios17.add(y: ["var_305_to_fp16"], x: ["var_304_cast_fp16"])
var_307_epsilon_0 = const()
nan% C|GN var_307_cast_fp16 = ios17.rsqrt(epsilon: ["var_307_epsilon_0"], x: ["var_306_cast_fp16"])
nan% C|GN var_308_cast_fp16 = ios17.mul(y: ["var_307_cast_fp16"], x: ["var_285_cast_fp16_1"])
model_transformer_layers_1_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_3_cast_fp16 = ios17.mul(x: ["var_308_cast_fp16"], y: ["model_transformer_layers_1_attn_k_norm_weight_to_fp16"])
nan% C|GN var_325_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_3_cast_fp16"])
var_326_split_sizes_0 = const()
var_326_axis_0 = const()
nan% C|GN var_326_cast_fp16_0, var_326_cast_fp16_1 = split(axis: ["var_326_axis_0"], split_sizes: ["var_326_split_sizes_0"], x: ["query_3_cast_fp16"])
const_13_promoted_to_fp16 = const()
nan% C|GN var_328_cast_fp16 = ios17.mul(y: ["const_13_promoted_to_fp16"], x: ["var_326_cast_fp16_1"])
var_330_interleave_0 = const()
nan% C|GN var_330_cast_fp16 = ios17.concat(interleave: ["var_330_interleave_0"], values: ["var_328_cast_fp16", "var_326_cast_fp16_0"], axis: ["var_5"])
nan% C|GN var_331_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_330_cast_fp16"])
nan% C|GN query_float_3_cast_fp16 = ios17.add(x: ["var_325_cast_fp16"], y: ["var_331_cast_fp16"])
nan% C|GN var_337_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["key_3_cast_fp16"])
var_338_split_sizes_0 = const()
var_338_axis_0 = const()
nan% C|GN var_338_cast_fp16_0, var_338_cast_fp16_1 = split(axis: ["var_338_axis_0"], split_sizes: ["var_338_split_sizes_0"], x: ["key_3_cast_fp16"])
const_14_promoted_to_fp16 = const()
nan% C|GN var_340_cast_fp16 = ios17.mul(y: ["const_14_promoted_to_fp16"], x: ["var_338_cast_fp16_1"])
var_342_interleave_0 = const()
nan% C|GN var_342_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_340_cast_fp16", "var_338_cast_fp16_0"], interleave: ["var_342_interleave_0"])
nan% C|GN var_343_cast_fp16 = ios17.mul(x: ["var_342_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN key_float_3_cast_fp16 = ios17.add(x: ["var_337_cast_fp16"], y: ["var_343_cast_fp16"])
reshape_4_shape_0 = const()
nan% C|GN reshape_4_cast_fp16 = ios17.reshape(x: ["key_float_3_cast_fp16"], shape: ["reshape_4_shape_0"])
tile_2_reps_0 = const()
nan% C|GN tile_2_cast_fp16 = tile(x: ["reshape_4_cast_fp16"], reps: ["tile_2_reps_0"])
reshape_5_shape_0 = const()
nan% C|GN reshape_5_cast_fp16 = ios17.reshape(shape: ["reshape_5_shape_0"], x: ["tile_2_cast_fp16"])
transpose_2_perm_0 = const()
keys_7_shape_0 = const()
nan% C|GN transpose_162 = ios17.transpose(x: ["reshape_5_cast_fp16"], perm: ["transpose_2_perm_0"])
nan% C|GN keys_7_cast_fp16 = ios17.reshape(shape: ["keys_7_shape_0"], x: ["transpose_162"])
reshape_6_shape_0 = const()
nan% C|GN reshape_6_cast_fp16 = ios17.reshape(shape: ["reshape_6_shape_0"], x: ["var_285_cast_fp16_2"])
tile_3_reps_0 = const()
nan% C|GN tile_3_cast_fp16 = tile(x: ["reshape_6_cast_fp16"], reps: ["tile_3_reps_0"])
reshape_7_shape_0 = const()
nan% C|GN reshape_7_cast_fp16 = ios17.reshape(shape: ["reshape_7_shape_0"], x: ["tile_3_cast_fp16"])
transpose_3_perm_0 = const()
values_7_shape_0 = const()
nan% C|GN transpose_161 = ios17.transpose(x: ["reshape_7_cast_fp16"], perm: ["transpose_3_perm_0"])
nan% C|GN values_7_cast_fp16 = ios17.reshape(shape: ["values_7_shape_0"], x: ["transpose_161"])
mul_1_y_0_to_fp16 = const()
nan% C|GN mul_1_cast_fp16 = ios17.mul(y: ["mul_1_y_0_to_fp16"], x: ["query_float_3_cast_fp16"])
matmul_1_transpose_y_0 = const()
matmul_1_transpose_x_0 = const()
nan% C|GN matmul_1_cast_fp16 = ios17.matmul(y: ["keys_7_cast_fp16"], x: ["mul_1_cast_fp16"], transpose_y: ["matmul_1_transpose_y_0"], transpose_x: ["matmul_1_transpose_x_0"])
nan% C|GN add_3_cast_fp16 = ios17.add(x: ["matmul_1_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_1_axis_0 = const()
nan% C|GN softmax_1_cast_fp16 = ios16.softmax(axis: ["softmax_1_axis_0"], x: ["add_3_cast_fp16"])
attn_output_5_transpose_x_0 = const()
attn_output_5_transpose_y_0 = const()
nan% C|GN attn_output_5_cast_fp16 = ios17.matmul(y: ["values_7_cast_fp16"], transpose_y: ["attn_output_5_transpose_y_0"], transpose_x: ["attn_output_5_transpose_x_0"], x: ["softmax_1_cast_fp16"])
var_359_perm_0 = const()
var_361 = const()
nan% C|GN transpose_160 = ios17.transpose(x: ["attn_output_5_cast_fp16"], perm: ["var_359_perm_0"])
nan% C|GN input_11_cast_fp16 = ios17.reshape(x: ["transpose_160"], shape: ["var_361"])
model_transformer_layers_1_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_5_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_1_attn_out_proj_weight_to_fp16"], x: ["input_11_cast_fp16"])
nan% C|GN x_35_cast_fp16 = ios17.add(y: ["linear_5_cast_fp16"], x: ["x_19_cast_fp16"])
var_10_promoted_to_fp16_7 = const()
nan% C|GN var_368_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_7"], x: ["x_35_cast_fp16"])
var_369 = const()
nan% C|GN var_370_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_368_cast_fp16"], axes: ["var_369"])
var_371_to_fp16 = const()
nan% C|GN var_372_cast_fp16 = ios17.add(x: ["var_370_cast_fp16"], y: ["var_371_to_fp16"])
var_373_epsilon_0 = const()
nan% C|GN var_373_cast_fp16 = ios17.rsqrt(x: ["var_372_cast_fp16"], epsilon: ["var_373_epsilon_0"])
nan% C|GN var_374_cast_fp16 = ios17.mul(y: ["var_373_cast_fp16"], x: ["x_35_cast_fp16"])
model_transformer_layers_1_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_ffn_norm_weight_to_fp16"], x: ["var_374_cast_fp16"])
model_transformer_layers_1_ffn_proj_1_weight_to_fp16 = const()
linear_6_bias_0_to_fp16 = const()
nan% C|GN linear_6_cast_fp16 = ios17.linear(bias: ["linear_6_bias_0_to_fp16"], weight: ["model_transformer_layers_1_ffn_proj_1_weight_to_fp16"], x: ["input_13_cast_fp16"])
var_384_split_sizes_0 = const()
var_384_axis_0 = const()
nan% C|GN var_384_cast_fp16_0, var_384_cast_fp16_1 = split(split_sizes: ["var_384_split_sizes_0"], x: ["linear_6_cast_fp16"], axis: ["var_384_axis_0"])
nan% C|GN var_386_cast_fp16 = ios16.silu(x: ["var_384_cast_fp16_0"])
nan% C|GN input_17_cast_fp16 = ios17.mul(y: ["var_384_cast_fp16_1"], x: ["var_386_cast_fp16"])
model_transformer_layers_1_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_7_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_17_cast_fp16"], weight: ["model_transformer_layers_1_ffn_proj_2_weight_to_fp16"])
nan% C|GN x_39_cast_fp16 = ios17.add(x: ["x_35_cast_fp16"], y: ["linear_7_cast_fp16"])
var_10_promoted_to_fp16_8 = const()
nan% C|GN var_397_cast_fp16 = ios17.pow(x: ["x_39_cast_fp16"], y: ["var_10_promoted_to_fp16_8"])
var_398 = const()
nan% C|GN var_399_cast_fp16 = ios16.reduce_mean(axes: ["var_398"], keep_dims: ["var_22"], x: ["var_397_cast_fp16"])
var_400_to_fp16 = const()
nan% C|GN var_401_cast_fp16 = ios17.add(y: ["var_400_to_fp16"], x: ["var_399_cast_fp16"])
var_402_epsilon_0 = const()
nan% C|GN var_402_cast_fp16 = ios17.rsqrt(epsilon: ["var_402_epsilon_0"], x: ["var_401_cast_fp16"])
nan% C|GN var_403_cast_fp16 = ios17.mul(y: ["var_402_cast_fp16"], x: ["x_39_cast_fp16"])
model_transformer_layers_2_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_2_attn_norm_weight_to_fp16"], x: ["var_403_cast_fp16"])
model_transformer_layers_2_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_8_cast_fp16 = ios17.linear(bias: ["linear_0_bias_0_to_fp16"], weight: ["model_transformer_layers_2_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_13_cast_fp16"])
var_417 = const()
nan% C|GN qkv_11_cast_fp16 = ios17.reshape(x: ["linear_8_cast_fp16"], shape: ["var_417"])
var_419_perm_0 = const()
var_420 = const()
var_421_axis_0 = const()
nan% C|GN transpose_159 = ios17.transpose(x: ["qkv_11_cast_fp16"], perm: ["var_419_perm_0"])
nan% C|GN var_421_cast_fp16_0, var_421_cast_fp16_1, var_421_cast_fp16_2 = split(axis: ["var_421_axis_0"], split_sizes: ["var_420"], x: ["transpose_159"])
var_10_promoted_to_fp16_9 = const()
nan% C|GN var_427_cast_fp16 = ios17.pow(x: ["var_421_cast_fp16_0"], y: ["var_10_promoted_to_fp16_9"])
var_428 = const()
nan% C|GN var_429_cast_fp16 = ios16.reduce_mean(x: ["var_427_cast_fp16"], axes: ["var_428"], keep_dims: ["var_22"])
var_430_to_fp16 = const()
nan% C|GN var_431_cast_fp16 = ios17.add(y: ["var_430_to_fp16"], x: ["var_429_cast_fp16"])
var_432_epsilon_0 = const()
nan% C|GN var_432_cast_fp16 = ios17.rsqrt(epsilon: ["var_432_epsilon_0"], x: ["var_431_cast_fp16"])
nan% C|GN var_433_cast_fp16 = ios17.mul(y: ["var_432_cast_fp16"], x: ["var_421_cast_fp16_0"])
model_transformer_layers_2_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_5_cast_fp16 = ios17.mul(x: ["var_433_cast_fp16"], y: ["model_transformer_layers_2_attn_q_norm_weight_to_fp16"])
var_10_promoted_to_fp16_10 = const()
nan% C|GN var_438_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_10"], x: ["var_421_cast_fp16_1"])
var_439 = const()
nan% C|GN var_440_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_438_cast_fp16"], axes: ["var_439"])
var_441_to_fp16 = const()
nan% C|GN var_442_cast_fp16 = ios17.add(y: ["var_441_to_fp16"], x: ["var_440_cast_fp16"])
var_443_epsilon_0 = const()
nan% C|GN var_443_cast_fp16 = ios17.rsqrt(x: ["var_442_cast_fp16"], epsilon: ["var_443_epsilon_0"])
nan% C|GN var_444_cast_fp16 = ios17.mul(x: ["var_421_cast_fp16_1"], y: ["var_443_cast_fp16"])
model_transformer_layers_2_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_5_cast_fp16 = ios17.mul(y: ["model_transformer_layers_2_attn_k_norm_weight_to_fp16"], x: ["var_444_cast_fp16"])
nan% C|GN var_461_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_5_cast_fp16"])
var_462_split_sizes_0 = const()
var_462_axis_0 = const()
nan% C|GN var_462_cast_fp16_0, var_462_cast_fp16_1 = split(x: ["query_5_cast_fp16"], axis: ["var_462_axis_0"], split_sizes: ["var_462_split_sizes_0"])
const_20_promoted_to_fp16 = const()
nan% C|GN var_464_cast_fp16 = ios17.mul(x: ["var_462_cast_fp16_1"], y: ["const_20_promoted_to_fp16"])
var_466_interleave_0 = const()
nan% C|GN var_466_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_464_cast_fp16", "var_462_cast_fp16_0"], interleave: ["var_466_interleave_0"])
nan% C|GN var_467_cast_fp16 = ios17.mul(x: ["var_466_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN query_float_5_cast_fp16 = ios17.add(y: ["var_467_cast_fp16"], x: ["var_461_cast_fp16"])
nan% C|GN var_473_cast_fp16 = ios17.mul(x: ["key_5_cast_fp16"], y: ["var_187_to_fp16"])
var_474_split_sizes_0 = const()
var_474_axis_0 = const()
nan% C|GN var_474_cast_fp16_0, var_474_cast_fp16_1 = split(axis: ["var_474_axis_0"], split_sizes: ["var_474_split_sizes_0"], x: ["key_5_cast_fp16"])
const_21_promoted_to_fp16 = const()
nan% C|GN var_476_cast_fp16 = ios17.mul(y: ["const_21_promoted_to_fp16"], x: ["var_474_cast_fp16_1"])
var_478_interleave_0 = const()
nan% C|GN var_478_cast_fp16 = ios17.concat(interleave: ["var_478_interleave_0"], axis: ["var_5"], values: ["var_476_cast_fp16", "var_474_cast_fp16_0"])
nan% C|GN var_479_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_478_cast_fp16"])
nan% C|GN key_float_5_cast_fp16 = ios17.add(y: ["var_479_cast_fp16"], x: ["var_473_cast_fp16"])
reshape_8_shape_0 = const()
nan% C|GN reshape_8_cast_fp16 = ios17.reshape(x: ["key_float_5_cast_fp16"], shape: ["reshape_8_shape_0"])
tile_4_reps_0 = const()
nan% C|GN tile_4_cast_fp16 = tile(reps: ["tile_4_reps_0"], x: ["reshape_8_cast_fp16"])
reshape_9_shape_0 = const()
nan% C|GN reshape_9_cast_fp16 = ios17.reshape(shape: ["reshape_9_shape_0"], x: ["tile_4_cast_fp16"])
transpose_4_perm_0 = const()
keys_11_shape_0 = const()
nan% C|GN transpose_158 = ios17.transpose(perm: ["transpose_4_perm_0"], x: ["reshape_9_cast_fp16"])
nan% C|GN keys_11_cast_fp16 = ios17.reshape(x: ["transpose_158"], shape: ["keys_11_shape_0"])
reshape_10_shape_0 = const()
nan% C|GN reshape_10_cast_fp16 = ios17.reshape(shape: ["reshape_10_shape_0"], x: ["var_421_cast_fp16_2"])
tile_5_reps_0 = const()
nan% C|GN tile_5_cast_fp16 = tile(x: ["reshape_10_cast_fp16"], reps: ["tile_5_reps_0"])
reshape_11_shape_0 = const()
nan% C|GN reshape_11_cast_fp16 = ios17.reshape(shape: ["reshape_11_shape_0"], x: ["tile_5_cast_fp16"])
transpose_5_perm_0 = const()
values_11_shape_0 = const()
nan% C|GN transpose_157 = ios17.transpose(perm: ["transpose_5_perm_0"], x: ["reshape_11_cast_fp16"])
nan% C|GN values_11_cast_fp16 = ios17.reshape(x: ["transpose_157"], shape: ["values_11_shape_0"])
mul_2_y_0_to_fp16 = const()
nan% C|GN mul_2_cast_fp16 = ios17.mul(x: ["query_float_5_cast_fp16"], y: ["mul_2_y_0_to_fp16"])
matmul_2_transpose_y_0 = const()
matmul_2_transpose_x_0 = const()
nan% C|GN matmul_2_cast_fp16 = ios17.matmul(x: ["mul_2_cast_fp16"], y: ["keys_11_cast_fp16"], transpose_y: ["matmul_2_transpose_y_0"], transpose_x: ["matmul_2_transpose_x_0"])
nan% C|GN add_5_cast_fp16 = ios17.add(x: ["matmul_2_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_2_axis_0 = const()
nan% C|GN softmax_2_cast_fp16 = ios16.softmax(axis: ["softmax_2_axis_0"], x: ["add_5_cast_fp16"])
attn_output_9_transpose_x_0 = const()
attn_output_9_transpose_y_0 = const()
nan% C|GN attn_output_9_cast_fp16 = ios17.matmul(transpose_y: ["attn_output_9_transpose_y_0"], y: ["values_11_cast_fp16"], transpose_x: ["attn_output_9_transpose_x_0"], x: ["softmax_2_cast_fp16"])
var_495_perm_0 = const()
var_497 = const()
nan% C|GN transpose_156 = ios17.transpose(perm: ["var_495_perm_0"], x: ["attn_output_9_cast_fp16"])
nan% C|GN input_19_cast_fp16 = ios17.reshape(shape: ["var_497"], x: ["transpose_156"])
model_transformer_layers_2_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_9_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_2_attn_out_proj_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_19_cast_fp16"])
nan% C|GN x_55_cast_fp16 = ios17.add(x: ["x_39_cast_fp16"], y: ["linear_9_cast_fp16"])
var_10_promoted_to_fp16_11 = const()
nan% C|GN var_504_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_11"], x: ["x_55_cast_fp16"])
var_505 = const()
nan% C|GN var_506_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_504_cast_fp16"], axes: ["var_505"])
var_507_to_fp16 = const()
nan% C|GN var_508_cast_fp16 = ios17.add(x: ["var_506_cast_fp16"], y: ["var_507_to_fp16"])
var_509_epsilon_0 = const()
nan% C|GN var_509_cast_fp16 = ios17.rsqrt(epsilon: ["var_509_epsilon_0"], x: ["var_508_cast_fp16"])
nan% C|GN var_510_cast_fp16 = ios17.mul(y: ["var_509_cast_fp16"], x: ["x_55_cast_fp16"])
model_transformer_layers_2_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_21_cast_fp16 = ios17.mul(x: ["var_510_cast_fp16"], y: ["model_transformer_layers_2_ffn_norm_weight_to_fp16"])
model_transformer_layers_2_ffn_proj_1_weight_to_fp16 = const()
linear_10_bias_0_to_fp16 = const()
nan% C|GN linear_10_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_2_ffn_proj_1_weight_to_fp16"], x: ["input_21_cast_fp16"], bias: ["linear_10_bias_0_to_fp16"])
var_520_split_sizes_0 = const()
var_520_axis_0 = const()
nan% C|GN var_520_cast_fp16_0, var_520_cast_fp16_1 = split(axis: ["var_520_axis_0"], split_sizes: ["var_520_split_sizes_0"], x: ["linear_10_cast_fp16"])
nan% C|GN var_522_cast_fp16 = ios16.silu(x: ["var_520_cast_fp16_0"])
nan% C|GN input_25_cast_fp16 = ios17.mul(y: ["var_520_cast_fp16_1"], x: ["var_522_cast_fp16"])
model_transformer_layers_2_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_11_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_2_ffn_proj_2_weight_to_fp16"], x: ["input_25_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
nan% C|GN x_59_cast_fp16 = ios17.add(x: ["x_55_cast_fp16"], y: ["linear_11_cast_fp16"])
var_10_promoted_to_fp16_12 = const()
nan% C|GN var_533_cast_fp16 = ios17.pow(x: ["x_59_cast_fp16"], y: ["var_10_promoted_to_fp16_12"])
var_534 = const()
nan% C|GN var_535_cast_fp16 = ios16.reduce_mean(x: ["var_533_cast_fp16"], axes: ["var_534"], keep_dims: ["var_22"])
var_536_to_fp16 = const()
nan% C|GN var_537_cast_fp16 = ios17.add(x: ["var_535_cast_fp16"], y: ["var_536_to_fp16"])
var_538_epsilon_0 = const()
nan% C|GN var_538_cast_fp16 = ios17.rsqrt(x: ["var_537_cast_fp16"], epsilon: ["var_538_epsilon_0"])
nan% C|GN var_539_cast_fp16 = ios17.mul(y: ["var_538_cast_fp16"], x: ["x_59_cast_fp16"])
model_transformer_layers_3_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_19_cast_fp16 = ios17.mul(y: ["model_transformer_layers_3_attn_norm_weight_to_fp16"], x: ["var_539_cast_fp16"])
model_transformer_layers_3_attn_qkv_proj_weight_to_fp16 = const()
linear_12_bias_0_to_fp16 = const()
nan% C|GN linear_12_cast_fp16 = ios17.linear(x: ["hidden_states_19_cast_fp16"], bias: ["linear_12_bias_0_to_fp16"], weight: ["model_transformer_layers_3_attn_qkv_proj_weight_to_fp16"])
var_553 = const()
nan% C|GN qkv_15_cast_fp16 = ios17.reshape(x: ["linear_12_cast_fp16"], shape: ["var_553"])
var_555_perm_0 = const()
var_556 = const()
var_557_axis_0 = const()
nan% C|GN transpose_155 = ios17.transpose(x: ["qkv_15_cast_fp16"], perm: ["var_555_perm_0"])
nan% C|GN var_557_cast_fp16_0, var_557_cast_fp16_1, var_557_cast_fp16_2 = split(split_sizes: ["var_556"], x: ["transpose_155"], axis: ["var_557_axis_0"])
var_10_promoted_to_fp16_13 = const()
nan% C|GN var_563_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_13"], x: ["var_557_cast_fp16_0"])
var_564 = const()
nan% C|GN var_565_cast_fp16 = ios16.reduce_mean(x: ["var_563_cast_fp16"], axes: ["var_564"], keep_dims: ["var_22"])
var_566_to_fp16 = const()
nan% C|GN var_567_cast_fp16 = ios17.add(y: ["var_566_to_fp16"], x: ["var_565_cast_fp16"])
var_568_epsilon_0 = const()
nan% C|GN var_568_cast_fp16 = ios17.rsqrt(x: ["var_567_cast_fp16"], epsilon: ["var_568_epsilon_0"])
nan% C|GN var_569_cast_fp16 = ios17.mul(x: ["var_557_cast_fp16_0"], y: ["var_568_cast_fp16"])
model_transformer_layers_3_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_7_cast_fp16 = ios17.mul(x: ["var_569_cast_fp16"], y: ["model_transformer_layers_3_attn_q_norm_weight_to_fp16"])
var_10_promoted_to_fp16_14 = const()
nan% C|GN var_574_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_14"], x: ["var_557_cast_fp16_1"])
var_575 = const()
nan% C|GN var_576_cast_fp16 = ios16.reduce_mean(axes: ["var_575"], keep_dims: ["var_22"], x: ["var_574_cast_fp16"])
var_577_to_fp16 = const()
nan% C|GN var_578_cast_fp16 = ios17.add(y: ["var_577_to_fp16"], x: ["var_576_cast_fp16"])
var_579_epsilon_0 = const()
nan% C|GN var_579_cast_fp16 = ios17.rsqrt(epsilon: ["var_579_epsilon_0"], x: ["var_578_cast_fp16"])
nan% C|GN var_580_cast_fp16 = ios17.mul(x: ["var_557_cast_fp16_1"], y: ["var_579_cast_fp16"])
model_transformer_layers_3_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_7_cast_fp16 = ios17.mul(x: ["var_580_cast_fp16"], y: ["model_transformer_layers_3_attn_k_norm_weight_to_fp16"])
nan% C|GN var_597_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_7_cast_fp16"])
var_598_split_sizes_0 = const()
var_598_axis_0 = const()
nan% C|GN var_598_cast_fp16_0, var_598_cast_fp16_1 = split(split_sizes: ["var_598_split_sizes_0"], x: ["query_7_cast_fp16"], axis: ["var_598_axis_0"])
const_27_promoted_to_fp16 = const()
nan% C|GN var_600_cast_fp16 = ios17.mul(y: ["const_27_promoted_to_fp16"], x: ["var_598_cast_fp16_1"])
var_602_interleave_0 = const()
nan% C|GN var_602_cast_fp16 = ios17.concat(interleave: ["var_602_interleave_0"], axis: ["var_5"], values: ["var_600_cast_fp16", "var_598_cast_fp16_0"])
nan% C|GN var_603_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_602_cast_fp16"])
nan% C|GN query_float_7_cast_fp16 = ios17.add(y: ["var_603_cast_fp16"], x: ["var_597_cast_fp16"])
nan% C|GN var_609_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["key_7_cast_fp16"])
var_610_split_sizes_0 = const()
var_610_axis_0 = const()
nan% C|GN var_610_cast_fp16_0, var_610_cast_fp16_1 = split(axis: ["var_610_axis_0"], split_sizes: ["var_610_split_sizes_0"], x: ["key_7_cast_fp16"])
const_28_promoted_to_fp16 = const()
nan% C|GN var_612_cast_fp16 = ios17.mul(y: ["const_28_promoted_to_fp16"], x: ["var_610_cast_fp16_1"])
var_614_interleave_0 = const()
nan% C|GN var_614_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_612_cast_fp16", "var_610_cast_fp16_0"], interleave: ["var_614_interleave_0"])
nan% C|GN var_615_cast_fp16 = ios17.mul(x: ["var_614_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN key_float_7_cast_fp16 = ios17.add(x: ["var_609_cast_fp16"], y: ["var_615_cast_fp16"])
reshape_12_shape_0 = const()
nan% C|GN reshape_12_cast_fp16 = ios17.reshape(shape: ["reshape_12_shape_0"], x: ["key_float_7_cast_fp16"])
tile_6_reps_0 = const()
nan% C|GN tile_6_cast_fp16 = tile(x: ["reshape_12_cast_fp16"], reps: ["tile_6_reps_0"])
reshape_13_shape_0 = const()
nan% C|GN reshape_13_cast_fp16 = ios17.reshape(x: ["tile_6_cast_fp16"], shape: ["reshape_13_shape_0"])
transpose_6_perm_0 = const()
keys_15_shape_0 = const()
nan% C|GN transpose_154 = ios17.transpose(perm: ["transpose_6_perm_0"], x: ["reshape_13_cast_fp16"])
nan% C|GN keys_15_cast_fp16 = ios17.reshape(x: ["transpose_154"], shape: ["keys_15_shape_0"])
reshape_14_shape_0 = const()
nan% C|GN reshape_14_cast_fp16 = ios17.reshape(shape: ["reshape_14_shape_0"], x: ["var_557_cast_fp16_2"])
tile_7_reps_0 = const()
nan% C|GN tile_7_cast_fp16 = tile(reps: ["tile_7_reps_0"], x: ["reshape_14_cast_fp16"])
reshape_15_shape_0 = const()
nan% C|GN reshape_15_cast_fp16 = ios17.reshape(x: ["tile_7_cast_fp16"], shape: ["reshape_15_shape_0"])
transpose_7_perm_0 = const()
values_15_shape_0 = const()
nan% C|GN transpose_153 = ios17.transpose(perm: ["transpose_7_perm_0"], x: ["reshape_15_cast_fp16"])
nan% C|GN values_15_cast_fp16 = ios17.reshape(x: ["transpose_153"], shape: ["values_15_shape_0"])
mul_3_y_0_to_fp16 = const()
nan% C|GN mul_3_cast_fp16 = ios17.mul(x: ["query_float_7_cast_fp16"], y: ["mul_3_y_0_to_fp16"])
matmul_3_transpose_y_0 = const()
matmul_3_transpose_x_0 = const()
nan% C|GN matmul_3_cast_fp16 = ios17.matmul(transpose_y: ["matmul_3_transpose_y_0"], y: ["keys_15_cast_fp16"], x: ["mul_3_cast_fp16"], transpose_x: ["matmul_3_transpose_x_0"])
nan% C|GN add_7_cast_fp16 = ios17.add(x: ["matmul_3_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_3_axis_0 = const()
nan% C|GN softmax_3_cast_fp16 = ios16.softmax(axis: ["softmax_3_axis_0"], x: ["add_7_cast_fp16"])
attn_output_13_transpose_x_0 = const()
attn_output_13_transpose_y_0 = const()
nan% C|GN attn_output_13_cast_fp16 = ios17.matmul(y: ["values_15_cast_fp16"], x: ["softmax_3_cast_fp16"], transpose_y: ["attn_output_13_transpose_y_0"], transpose_x: ["attn_output_13_transpose_x_0"])
var_631_perm_0 = const()
var_633 = const()
nan% C|GN transpose_152 = ios17.transpose(x: ["attn_output_13_cast_fp16"], perm: ["var_631_perm_0"])
nan% C|GN input_27_cast_fp16 = ios17.reshape(shape: ["var_633"], x: ["transpose_152"])
model_transformer_layers_3_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_13_cast_fp16 = ios17.linear(x: ["input_27_cast_fp16"], weight: ["model_transformer_layers_3_attn_out_proj_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"])
nan% C|GN x_75_cast_fp16 = ios17.add(y: ["linear_13_cast_fp16"], x: ["x_59_cast_fp16"])
var_10_promoted_to_fp16_15 = const()
nan% C|GN var_640_cast_fp16 = ios17.pow(x: ["x_75_cast_fp16"], y: ["var_10_promoted_to_fp16_15"])
var_641 = const()
nan% C|GN var_642_cast_fp16 = ios16.reduce_mean(x: ["var_640_cast_fp16"], axes: ["var_641"], keep_dims: ["var_22"])
var_643_to_fp16 = const()
nan% C|GN var_644_cast_fp16 = ios17.add(x: ["var_642_cast_fp16"], y: ["var_643_to_fp16"])
var_645_epsilon_0 = const()
nan% C|GN var_645_cast_fp16 = ios17.rsqrt(epsilon: ["var_645_epsilon_0"], x: ["var_644_cast_fp16"])
nan% C|GN var_646_cast_fp16 = ios17.mul(x: ["x_75_cast_fp16"], y: ["var_645_cast_fp16"])
model_transformer_layers_3_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_29_cast_fp16 = ios17.mul(y: ["model_transformer_layers_3_ffn_norm_weight_to_fp16"], x: ["var_646_cast_fp16"])
model_transformer_layers_3_ffn_proj_1_weight_to_fp16 = const()
linear_14_bias_0_to_fp16 = const()
nan% C|GN linear_14_cast_fp16 = ios17.linear(x: ["input_29_cast_fp16"], weight: ["model_transformer_layers_3_ffn_proj_1_weight_to_fp16"], bias: ["linear_14_bias_0_to_fp16"])
var_656_split_sizes_0 = const()
var_656_axis_0 = const()
nan% C|GN var_656_cast_fp16_0, var_656_cast_fp16_1 = split(x: ["linear_14_cast_fp16"], split_sizes: ["var_656_split_sizes_0"], axis: ["var_656_axis_0"])
nan% C|GN var_658_cast_fp16 = ios16.silu(x: ["var_656_cast_fp16_0"])
nan% C|GN input_33_cast_fp16 = ios17.mul(y: ["var_656_cast_fp16_1"], x: ["var_658_cast_fp16"])
model_transformer_layers_3_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_15_cast_fp16 = ios17.linear(x: ["input_33_cast_fp16"], weight: ["model_transformer_layers_3_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"])
nan% C|GN x_79_cast_fp16 = ios17.add(x: ["x_75_cast_fp16"], y: ["linear_15_cast_fp16"])
var_10_promoted_to_fp16_16 = const()
nan% C|GN var_669_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_16"], x: ["x_79_cast_fp16"])
var_670 = const()
nan% C|GN var_671_cast_fp16 = ios16.reduce_mean(axes: ["var_670"], x: ["var_669_cast_fp16"], keep_dims: ["var_22"])
var_672_to_fp16 = const()
nan% C|GN var_673_cast_fp16 = ios17.add(y: ["var_672_to_fp16"], x: ["var_671_cast_fp16"])
var_674_epsilon_0 = const()
nan% C|GN var_674_cast_fp16 = ios17.rsqrt(x: ["var_673_cast_fp16"], epsilon: ["var_674_epsilon_0"])
nan% C|GN var_675_cast_fp16 = ios17.mul(x: ["x_79_cast_fp16"], y: ["var_674_cast_fp16"])
model_transformer_layers_4_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_25_cast_fp16 = ios17.mul(x: ["var_675_cast_fp16"], y: ["model_transformer_layers_4_attn_norm_weight_to_fp16"])
model_transformer_layers_4_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_16_cast_fp16 = ios17.linear(x: ["hidden_states_25_cast_fp16"], weight: ["model_transformer_layers_4_attn_qkv_proj_weight_to_fp16"], bias: ["linear_12_bias_0_to_fp16"])
var_689 = const()
nan% C|GN qkv_19_cast_fp16 = ios17.reshape(shape: ["var_689"], x: ["linear_16_cast_fp16"])
var_691_perm_0 = const()
var_692 = const()
var_693_axis_0 = const()
nan% C|GN transpose_151 = ios17.transpose(x: ["qkv_19_cast_fp16"], perm: ["var_691_perm_0"])
nan% C|GN var_693_cast_fp16_0, var_693_cast_fp16_1, var_693_cast_fp16_2 = split(split_sizes: ["var_692"], x: ["transpose_151"], axis: ["var_693_axis_0"])
var_10_promoted_to_fp16_17 = const()
nan% C|GN var_699_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_17"], x: ["var_693_cast_fp16_0"])
var_700 = const()
nan% C|GN var_701_cast_fp16 = ios16.reduce_mean(axes: ["var_700"], x: ["var_699_cast_fp16"], keep_dims: ["var_22"])
var_702_to_fp16 = const()
nan% C|GN var_703_cast_fp16 = ios17.add(x: ["var_701_cast_fp16"], y: ["var_702_to_fp16"])
var_704_epsilon_0 = const()
nan% C|GN var_704_cast_fp16 = ios17.rsqrt(epsilon: ["var_704_epsilon_0"], x: ["var_703_cast_fp16"])
nan% C|GN var_705_cast_fp16 = ios17.mul(x: ["var_693_cast_fp16_0"], y: ["var_704_cast_fp16"])
model_transformer_layers_4_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_9_cast_fp16 = ios17.mul(y: ["model_transformer_layers_4_attn_q_norm_weight_to_fp16"], x: ["var_705_cast_fp16"])
var_10_promoted_to_fp16_18 = const()
nan% C|GN var_710_cast_fp16 = ios17.pow(x: ["var_693_cast_fp16_1"], y: ["var_10_promoted_to_fp16_18"])
var_711 = const()
nan% C|GN var_712_cast_fp16 = ios16.reduce_mean(x: ["var_710_cast_fp16"], keep_dims: ["var_22"], axes: ["var_711"])
var_713_to_fp16 = const()
nan% C|GN var_714_cast_fp16 = ios17.add(y: ["var_713_to_fp16"], x: ["var_712_cast_fp16"])
var_715_epsilon_0 = const()
nan% C|GN var_715_cast_fp16 = ios17.rsqrt(epsilon: ["var_715_epsilon_0"], x: ["var_714_cast_fp16"])
nan% C|GN var_716_cast_fp16 = ios17.mul(y: ["var_715_cast_fp16"], x: ["var_693_cast_fp16_1"])
model_transformer_layers_4_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_9_cast_fp16 = ios17.mul(x: ["var_716_cast_fp16"], y: ["model_transformer_layers_4_attn_k_norm_weight_to_fp16"])
nan% C|GN var_733_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_9_cast_fp16"])
var_734_split_sizes_0 = const()
var_734_axis_0 = const()
nan% C|GN var_734_cast_fp16_0, var_734_cast_fp16_1 = split(split_sizes: ["var_734_split_sizes_0"], x: ["query_9_cast_fp16"], axis: ["var_734_axis_0"])
const_34_promoted_to_fp16 = const()
nan% C|GN var_736_cast_fp16 = ios17.mul(y: ["const_34_promoted_to_fp16"], x: ["var_734_cast_fp16_1"])
var_738_interleave_0 = const()
nan% C|GN var_738_cast_fp16 = ios17.concat(interleave: ["var_738_interleave_0"], values: ["var_736_cast_fp16", "var_734_cast_fp16_0"], axis: ["var_5"])
nan% C|GN var_739_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_738_cast_fp16"])
nan% C|GN query_float_9_cast_fp16 = ios17.add(x: ["var_733_cast_fp16"], y: ["var_739_cast_fp16"])
nan% C|GN var_745_cast_fp16 = ios17.mul(x: ["key_9_cast_fp16"], y: ["var_187_to_fp16"])
var_746_split_sizes_0 = const()
var_746_axis_0 = const()
nan% C|GN var_746_cast_fp16_0, var_746_cast_fp16_1 = split(axis: ["var_746_axis_0"], split_sizes: ["var_746_split_sizes_0"], x: ["key_9_cast_fp16"])
const_35_promoted_to_fp16 = const()
nan% C|GN var_748_cast_fp16 = ios17.mul(y: ["const_35_promoted_to_fp16"], x: ["var_746_cast_fp16_1"])
var_750_interleave_0 = const()
nan% C|GN var_750_cast_fp16 = ios17.concat(axis: ["var_5"], interleave: ["var_750_interleave_0"], values: ["var_748_cast_fp16", "var_746_cast_fp16_0"])
nan% C|GN var_751_cast_fp16 = ios17.mul(x: ["var_750_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN key_float_9_cast_fp16 = ios17.add(y: ["var_751_cast_fp16"], x: ["var_745_cast_fp16"])
reshape_16_shape_0 = const()
nan% C|GN reshape_16_cast_fp16 = ios17.reshape(shape: ["reshape_16_shape_0"], x: ["key_float_9_cast_fp16"])
tile_8_reps_0 = const()
nan% C|GN tile_8_cast_fp16 = tile(x: ["reshape_16_cast_fp16"], reps: ["tile_8_reps_0"])
reshape_17_shape_0 = const()
nan% C|GN reshape_17_cast_fp16 = ios17.reshape(x: ["tile_8_cast_fp16"], shape: ["reshape_17_shape_0"])
transpose_8_perm_0 = const()
keys_19_shape_0 = const()
nan% C|GN transpose_150 = ios17.transpose(perm: ["transpose_8_perm_0"], x: ["reshape_17_cast_fp16"])
nan% C|GN keys_19_cast_fp16 = ios17.reshape(x: ["transpose_150"], shape: ["keys_19_shape_0"])
reshape_18_shape_0 = const()
nan% C|GN reshape_18_cast_fp16 = ios17.reshape(x: ["var_693_cast_fp16_2"], shape: ["reshape_18_shape_0"])
tile_9_reps_0 = const()
nan% C|GN tile_9_cast_fp16 = tile(x: ["reshape_18_cast_fp16"], reps: ["tile_9_reps_0"])
reshape_19_shape_0 = const()
nan% C|GN reshape_19_cast_fp16 = ios17.reshape(x: ["tile_9_cast_fp16"], shape: ["reshape_19_shape_0"])
transpose_9_perm_0 = const()
values_19_shape_0 = const()
nan% C|GN transpose_149 = ios17.transpose(perm: ["transpose_9_perm_0"], x: ["reshape_19_cast_fp16"])
nan% C|GN values_19_cast_fp16 = ios17.reshape(x: ["transpose_149"], shape: ["values_19_shape_0"])
mul_4_y_0_to_fp16 = const()
nan% C|GN mul_4_cast_fp16 = ios17.mul(y: ["mul_4_y_0_to_fp16"], x: ["query_float_9_cast_fp16"])
matmul_4_transpose_y_0 = const()
matmul_4_transpose_x_0 = const()
nan% C|GN matmul_4_cast_fp16 = ios17.matmul(transpose_x: ["matmul_4_transpose_x_0"], x: ["mul_4_cast_fp16"], y: ["keys_19_cast_fp16"], transpose_y: ["matmul_4_transpose_y_0"])
nan% C|GN add_9_cast_fp16 = ios17.add(y: ["causal_mask_3_to_fp16"], x: ["matmul_4_cast_fp16"])
softmax_4_axis_0 = const()
nan% C|GN softmax_4_cast_fp16 = ios16.softmax(x: ["add_9_cast_fp16"], axis: ["softmax_4_axis_0"])
attn_output_17_transpose_x_0 = const()
attn_output_17_transpose_y_0 = const()
nan% C|GN attn_output_17_cast_fp16 = ios17.matmul(transpose_y: ["attn_output_17_transpose_y_0"], x: ["softmax_4_cast_fp16"], y: ["values_19_cast_fp16"], transpose_x: ["attn_output_17_transpose_x_0"])
var_767_perm_0 = const()
var_769 = const()
nan% C|GN transpose_148 = ios17.transpose(x: ["attn_output_17_cast_fp16"], perm: ["var_767_perm_0"])
nan% C|GN input_35_cast_fp16 = ios17.reshape(x: ["transpose_148"], shape: ["var_769"])
model_transformer_layers_4_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_17_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_35_cast_fp16"], weight: ["model_transformer_layers_4_attn_out_proj_weight_to_fp16"])
nan% C|GN x_95_cast_fp16 = ios17.add(x: ["x_79_cast_fp16"], y: ["linear_17_cast_fp16"])
var_10_promoted_to_fp16_19 = const()
nan% C|GN var_776_cast_fp16 = ios17.pow(x: ["x_95_cast_fp16"], y: ["var_10_promoted_to_fp16_19"])
var_777 = const()
nan% C|GN var_778_cast_fp16 = ios16.reduce_mean(axes: ["var_777"], x: ["var_776_cast_fp16"], keep_dims: ["var_22"])
var_779_to_fp16 = const()
nan% C|GN var_780_cast_fp16 = ios17.add(x: ["var_778_cast_fp16"], y: ["var_779_to_fp16"])
var_781_epsilon_0 = const()
nan% C|GN var_781_cast_fp16 = ios17.rsqrt(epsilon: ["var_781_epsilon_0"], x: ["var_780_cast_fp16"])
nan% C|GN var_782_cast_fp16 = ios17.mul(x: ["x_95_cast_fp16"], y: ["var_781_cast_fp16"])
model_transformer_layers_4_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_37_cast_fp16 = ios17.mul(x: ["var_782_cast_fp16"], y: ["model_transformer_layers_4_ffn_norm_weight_to_fp16"])
model_transformer_layers_4_ffn_proj_1_weight_to_fp16 = const()
linear_18_bias_0_to_fp16 = const()
nan% C|GN linear_18_cast_fp16 = ios17.linear(x: ["input_37_cast_fp16"], weight: ["model_transformer_layers_4_ffn_proj_1_weight_to_fp16"], bias: ["linear_18_bias_0_to_fp16"])
var_792_split_sizes_0 = const()
var_792_axis_0 = const()
nan% C|GN var_792_cast_fp16_0, var_792_cast_fp16_1 = split(axis: ["var_792_axis_0"], x: ["linear_18_cast_fp16"], split_sizes: ["var_792_split_sizes_0"])
nan% C|GN var_794_cast_fp16 = ios16.silu(x: ["var_792_cast_fp16_0"])
nan% C|GN input_41_cast_fp16 = ios17.mul(y: ["var_792_cast_fp16_1"], x: ["var_794_cast_fp16"])
model_transformer_layers_4_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_19_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_4_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_41_cast_fp16"])
nan% C|GN x_99_cast_fp16 = ios17.add(y: ["linear_19_cast_fp16"], x: ["x_95_cast_fp16"])
var_10_promoted_to_fp16_20 = const()
nan% C|GN var_805_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_20"], x: ["x_99_cast_fp16"])
var_806 = const()
nan% C|GN var_807_cast_fp16 = ios16.reduce_mean(x: ["var_805_cast_fp16"], keep_dims: ["var_22"], axes: ["var_806"])
var_808_to_fp16 = const()
nan% C|GN var_809_cast_fp16 = ios17.add(y: ["var_808_to_fp16"], x: ["var_807_cast_fp16"])
var_810_epsilon_0 = const()
nan% C|GN var_810_cast_fp16 = ios17.rsqrt(x: ["var_809_cast_fp16"], epsilon: ["var_810_epsilon_0"])
nan% C|GN var_811_cast_fp16 = ios17.mul(x: ["x_99_cast_fp16"], y: ["var_810_cast_fp16"])
model_transformer_layers_5_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_31_cast_fp16 = ios17.mul(x: ["var_811_cast_fp16"], y: ["model_transformer_layers_5_attn_norm_weight_to_fp16"])
model_transformer_layers_5_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_20_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_5_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_31_cast_fp16"], bias: ["linear_12_bias_0_to_fp16"])
var_825 = const()
nan% C|GN qkv_23_cast_fp16 = ios17.reshape(shape: ["var_825"], x: ["linear_20_cast_fp16"])
var_827_perm_0 = const()
var_828 = const()
var_829_axis_0 = const()
nan% C|GN transpose_147 = ios17.transpose(x: ["qkv_23_cast_fp16"], perm: ["var_827_perm_0"])
nan% C|GN var_829_cast_fp16_0, var_829_cast_fp16_1, var_829_cast_fp16_2 = split(x: ["transpose_147"], axis: ["var_829_axis_0"], split_sizes: ["var_828"])
var_10_promoted_to_fp16_21 = const()
nan% C|GN var_835_cast_fp16 = ios17.pow(x: ["var_829_cast_fp16_0"], y: ["var_10_promoted_to_fp16_21"])
var_836 = const()
nan% C|GN var_837_cast_fp16 = ios16.reduce_mean(axes: ["var_836"], keep_dims: ["var_22"], x: ["var_835_cast_fp16"])
var_838_to_fp16 = const()
nan% C|GN var_839_cast_fp16 = ios17.add(y: ["var_838_to_fp16"], x: ["var_837_cast_fp16"])
var_840_epsilon_0 = const()
nan% C|GN var_840_cast_fp16 = ios17.rsqrt(epsilon: ["var_840_epsilon_0"], x: ["var_839_cast_fp16"])
nan% C|GN var_841_cast_fp16 = ios17.mul(y: ["var_840_cast_fp16"], x: ["var_829_cast_fp16_0"])
model_transformer_layers_5_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_11_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_attn_q_norm_weight_to_fp16"], x: ["var_841_cast_fp16"])
var_10_promoted_to_fp16_22 = const()
nan% C|GN var_846_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_22"], x: ["var_829_cast_fp16_1"])
var_847 = const()
nan% C|GN var_848_cast_fp16 = ios16.reduce_mean(axes: ["var_847"], keep_dims: ["var_22"], x: ["var_846_cast_fp16"])
var_849_to_fp16 = const()
nan% C|GN var_850_cast_fp16 = ios17.add(x: ["var_848_cast_fp16"], y: ["var_849_to_fp16"])
var_851_epsilon_0 = const()
nan% C|GN var_851_cast_fp16 = ios17.rsqrt(x: ["var_850_cast_fp16"], epsilon: ["var_851_epsilon_0"])
nan% C|GN var_852_cast_fp16 = ios17.mul(x: ["var_829_cast_fp16_1"], y: ["var_851_cast_fp16"])
model_transformer_layers_5_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_11_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_attn_k_norm_weight_to_fp16"], x: ["var_852_cast_fp16"])
nan% C|GN var_869_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_11_cast_fp16"])
var_870_split_sizes_0 = const()
var_870_axis_0 = const()
nan% C|GN var_870_cast_fp16_0, var_870_cast_fp16_1 = split(axis: ["var_870_axis_0"], x: ["query_11_cast_fp16"], split_sizes: ["var_870_split_sizes_0"])
const_41_promoted_to_fp16 = const()
nan% C|GN var_872_cast_fp16 = ios17.mul(y: ["const_41_promoted_to_fp16"], x: ["var_870_cast_fp16_1"])
var_874_interleave_0 = const()
nan% C|GN var_874_cast_fp16 = ios17.concat(values: ["var_872_cast_fp16", "var_870_cast_fp16_0"], interleave: ["var_874_interleave_0"], axis: ["var_5"])
nan% C|GN var_875_cast_fp16 = ios17.mul(x: ["var_874_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN query_float_11_cast_fp16 = ios17.add(y: ["var_875_cast_fp16"], x: ["var_869_cast_fp16"])
nan% C|GN var_881_cast_fp16 = ios17.mul(x: ["key_11_cast_fp16"], y: ["var_187_to_fp16"])
var_882_split_sizes_0 = const()
var_882_axis_0 = const()
nan% C|GN var_882_cast_fp16_0, var_882_cast_fp16_1 = split(axis: ["var_882_axis_0"], split_sizes: ["var_882_split_sizes_0"], x: ["key_11_cast_fp16"])
const_42_promoted_to_fp16 = const()
nan% C|GN var_884_cast_fp16 = ios17.mul(y: ["const_42_promoted_to_fp16"], x: ["var_882_cast_fp16_1"])
var_886_interleave_0 = const()
nan% C|GN var_886_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_884_cast_fp16", "var_882_cast_fp16_0"], interleave: ["var_886_interleave_0"])
nan% C|GN var_887_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_886_cast_fp16"])
nan% C|GN key_float_11_cast_fp16 = ios17.add(x: ["var_881_cast_fp16"], y: ["var_887_cast_fp16"])
reshape_20_shape_0 = const()
nan% C|GN reshape_20_cast_fp16 = ios17.reshape(shape: ["reshape_20_shape_0"], x: ["key_float_11_cast_fp16"])
tile_10_reps_0 = const()
nan% C|GN tile_10_cast_fp16 = tile(reps: ["tile_10_reps_0"], x: ["reshape_20_cast_fp16"])
reshape_21_shape_0 = const()
nan% C|GN reshape_21_cast_fp16 = ios17.reshape(shape: ["reshape_21_shape_0"], x: ["tile_10_cast_fp16"])
transpose_10_perm_0 = const()
keys_23_shape_0 = const()
nan% C|GN transpose_146 = ios17.transpose(perm: ["transpose_10_perm_0"], x: ["reshape_21_cast_fp16"])
nan% C|GN keys_23_cast_fp16 = ios17.reshape(x: ["transpose_146"], shape: ["keys_23_shape_0"])
reshape_22_shape_0 = const()
nan% C|GN reshape_22_cast_fp16 = ios17.reshape(shape: ["reshape_22_shape_0"], x: ["var_829_cast_fp16_2"])
tile_11_reps_0 = const()
nan% C|GN tile_11_cast_fp16 = tile(reps: ["tile_11_reps_0"], x: ["reshape_22_cast_fp16"])
reshape_23_shape_0 = const()
nan% C|GN reshape_23_cast_fp16 = ios17.reshape(shape: ["reshape_23_shape_0"], x: ["tile_11_cast_fp16"])
transpose_11_perm_0 = const()
values_23_shape_0 = const()
nan% C|GN transpose_145 = ios17.transpose(perm: ["transpose_11_perm_0"], x: ["reshape_23_cast_fp16"])
nan% C|GN values_23_cast_fp16 = ios17.reshape(x: ["transpose_145"], shape: ["values_23_shape_0"])
mul_5_y_0_to_fp16 = const()
nan% C|GN mul_5_cast_fp16 = ios17.mul(x: ["query_float_11_cast_fp16"], y: ["mul_5_y_0_to_fp16"])
matmul_5_transpose_y_0 = const()
matmul_5_transpose_x_0 = const()
nan% C|GN matmul_5_cast_fp16 = ios17.matmul(transpose_y: ["matmul_5_transpose_y_0"], x: ["mul_5_cast_fp16"], y: ["keys_23_cast_fp16"], transpose_x: ["matmul_5_transpose_x_0"])
nan% C|GN add_11_cast_fp16 = ios17.add(y: ["causal_mask_3_to_fp16"], x: ["matmul_5_cast_fp16"])
softmax_5_axis_0 = const()
nan% C|GN softmax_5_cast_fp16 = ios16.softmax(axis: ["softmax_5_axis_0"], x: ["add_11_cast_fp16"])
attn_output_21_transpose_x_0 = const()
attn_output_21_transpose_y_0 = const()
nan% C|GN attn_output_21_cast_fp16 = ios17.matmul(y: ["values_23_cast_fp16"], transpose_x: ["attn_output_21_transpose_x_0"], transpose_y: ["attn_output_21_transpose_y_0"], x: ["softmax_5_cast_fp16"])
var_903_perm_0 = const()
var_905 = const()
nan% C|GN transpose_144 = ios17.transpose(perm: ["var_903_perm_0"], x: ["attn_output_21_cast_fp16"])
nan% C|GN input_43_cast_fp16 = ios17.reshape(x: ["transpose_144"], shape: ["var_905"])
model_transformer_layers_5_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_21_cast_fp16 = ios17.linear(x: ["input_43_cast_fp16"], weight: ["model_transformer_layers_5_attn_out_proj_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"])
nan% C|GN x_115_cast_fp16 = ios17.add(x: ["x_99_cast_fp16"], y: ["linear_21_cast_fp16"])
var_10_promoted_to_fp16_23 = const()
nan% C|GN var_912_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_23"], x: ["x_115_cast_fp16"])
var_913 = const()
nan% C|GN var_914_cast_fp16 = ios16.reduce_mean(axes: ["var_913"], x: ["var_912_cast_fp16"], keep_dims: ["var_22"])
var_915_to_fp16 = const()
nan% C|GN var_916_cast_fp16 = ios17.add(y: ["var_915_to_fp16"], x: ["var_914_cast_fp16"])
var_917_epsilon_0 = const()
nan% C|GN var_917_cast_fp16 = ios17.rsqrt(x: ["var_916_cast_fp16"], epsilon: ["var_917_epsilon_0"])
nan% C|GN var_918_cast_fp16 = ios17.mul(y: ["var_917_cast_fp16"], x: ["x_115_cast_fp16"])
model_transformer_layers_5_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_45_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_ffn_norm_weight_to_fp16"], x: ["var_918_cast_fp16"])
model_transformer_layers_5_ffn_proj_1_weight_to_fp16 = const()
linear_22_bias_0_to_fp16 = const()
nan% C|GN linear_22_cast_fp16 = ios17.linear(bias: ["linear_22_bias_0_to_fp16"], x: ["input_45_cast_fp16"], weight: ["model_transformer_layers_5_ffn_proj_1_weight_to_fp16"])
var_928_split_sizes_0 = const()
var_928_axis_0 = const()
nan% C|GN var_928_cast_fp16_0, var_928_cast_fp16_1 = split(split_sizes: ["var_928_split_sizes_0"], x: ["linear_22_cast_fp16"], axis: ["var_928_axis_0"])
nan% C|GN var_930_cast_fp16 = ios16.silu(x: ["var_928_cast_fp16_0"])
nan% C|GN input_49_cast_fp16 = ios17.mul(y: ["var_928_cast_fp16_1"], x: ["var_930_cast_fp16"])
model_transformer_layers_5_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_23_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_49_cast_fp16"], weight: ["model_transformer_layers_5_ffn_proj_2_weight_to_fp16"])
nan% C|GN x_119_cast_fp16 = ios17.add(x: ["x_115_cast_fp16"], y: ["linear_23_cast_fp16"])
var_10_promoted_to_fp16_24 = const()
nan% C|GN var_941_cast_fp16 = ios17.pow(x: ["x_119_cast_fp16"], y: ["var_10_promoted_to_fp16_24"])
var_942 = const()
nan% C|GN var_943_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_941_cast_fp16"], axes: ["var_942"])
var_944_to_fp16 = const()
nan% C|GN var_945_cast_fp16 = ios17.add(x: ["var_943_cast_fp16"], y: ["var_944_to_fp16"])
var_946_epsilon_0 = const()
nan% C|GN var_946_cast_fp16 = ios17.rsqrt(epsilon: ["var_946_epsilon_0"], x: ["var_945_cast_fp16"])
nan% C|GN var_947_cast_fp16 = ios17.mul(y: ["var_946_cast_fp16"], x: ["x_119_cast_fp16"])
model_transformer_layers_6_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_37_cast_fp16 = ios17.mul(y: ["model_transformer_layers_6_attn_norm_weight_to_fp16"], x: ["var_947_cast_fp16"])
model_transformer_layers_6_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_24_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_6_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_37_cast_fp16"], bias: ["linear_12_bias_0_to_fp16"])
var_961 = const()
nan% C|GN qkv_27_cast_fp16 = ios17.reshape(shape: ["var_961"], x: ["linear_24_cast_fp16"])
var_963_perm_0 = const()
var_964 = const()
var_965_axis_0 = const()
nan% C|GN transpose_143 = ios17.transpose(perm: ["var_963_perm_0"], x: ["qkv_27_cast_fp16"])
nan% C|GN var_965_cast_fp16_0, var_965_cast_fp16_1, var_965_cast_fp16_2 = split(x: ["transpose_143"], split_sizes: ["var_964"], axis: ["var_965_axis_0"])
var_10_promoted_to_fp16_25 = const()
nan% C|GN var_971_cast_fp16 = ios17.pow(x: ["var_965_cast_fp16_0"], y: ["var_10_promoted_to_fp16_25"])
var_972 = const()
nan% C|GN var_973_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_971_cast_fp16"], axes: ["var_972"])
var_974_to_fp16 = const()
nan% C|GN var_975_cast_fp16 = ios17.add(x: ["var_973_cast_fp16"], y: ["var_974_to_fp16"])
var_976_epsilon_0 = const()
nan% C|GN var_976_cast_fp16 = ios17.rsqrt(epsilon: ["var_976_epsilon_0"], x: ["var_975_cast_fp16"])
nan% C|GN var_977_cast_fp16 = ios17.mul(x: ["var_965_cast_fp16_0"], y: ["var_976_cast_fp16"])
model_transformer_layers_6_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_6_attn_q_norm_weight_to_fp16"], x: ["var_977_cast_fp16"])
var_10_promoted_to_fp16_26 = const()
nan% C|GN var_982_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_26"], x: ["var_965_cast_fp16_1"])
var_983 = const()
nan% C|GN var_984_cast_fp16 = ios16.reduce_mean(x: ["var_982_cast_fp16"], keep_dims: ["var_22"], axes: ["var_983"])
var_985_to_fp16 = const()
nan% C|GN var_986_cast_fp16 = ios17.add(x: ["var_984_cast_fp16"], y: ["var_985_to_fp16"])
var_987_epsilon_0 = const()
nan% C|GN var_987_cast_fp16 = ios17.rsqrt(epsilon: ["var_987_epsilon_0"], x: ["var_986_cast_fp16"])
nan% C|GN var_988_cast_fp16 = ios17.mul(x: ["var_965_cast_fp16_1"], y: ["var_987_cast_fp16"])
model_transformer_layers_6_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_13_cast_fp16 = ios17.mul(x: ["var_988_cast_fp16"], y: ["model_transformer_layers_6_attn_k_norm_weight_to_fp16"])
nan% C|GN var_1005_cast_fp16 = ios17.mul(x: ["query_13_cast_fp16"], y: ["var_187_to_fp16"])
var_1006_split_sizes_0 = const()
var_1006_axis_0 = const()
nan% C|GN var_1006_cast_fp16_0, var_1006_cast_fp16_1 = split(axis: ["var_1006_axis_0"], x: ["query_13_cast_fp16"], split_sizes: ["var_1006_split_sizes_0"])
const_48_promoted_to_fp16 = const()
nan% C|GN var_1008_cast_fp16 = ios17.mul(y: ["const_48_promoted_to_fp16"], x: ["var_1006_cast_fp16_1"])
var_1010_interleave_0 = const()
nan% C|GN var_1010_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_1008_cast_fp16", "var_1006_cast_fp16_0"], interleave: ["var_1010_interleave_0"])
nan% C|GN var_1011_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_1010_cast_fp16"])
nan% C|GN query_float_13_cast_fp16 = ios17.add(x: ["var_1005_cast_fp16"], y: ["var_1011_cast_fp16"])
nan% C|GN var_1017_cast_fp16 = ios17.mul(x: ["key_13_cast_fp16"], y: ["var_187_to_fp16"])
var_1018_split_sizes_0 = const()
var_1018_axis_0 = const()
nan% C|GN var_1018_cast_fp16_0, var_1018_cast_fp16_1 = split(x: ["key_13_cast_fp16"], split_sizes: ["var_1018_split_sizes_0"], axis: ["var_1018_axis_0"])
const_49_promoted_to_fp16 = const()
nan% C|GN var_1020_cast_fp16 = ios17.mul(x: ["var_1018_cast_fp16_1"], y: ["const_49_promoted_to_fp16"])
var_1022_interleave_0 = const()
nan% C|GN var_1022_cast_fp16 = ios17.concat(interleave: ["var_1022_interleave_0"], values: ["var_1020_cast_fp16", "var_1018_cast_fp16_0"], axis: ["var_5"])
nan% C|GN var_1023_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_1022_cast_fp16"])
nan% C|GN key_float_13_cast_fp16 = ios17.add(y: ["var_1023_cast_fp16"], x: ["var_1017_cast_fp16"])
reshape_24_shape_0 = const()
nan% C|GN reshape_24_cast_fp16 = ios17.reshape(shape: ["reshape_24_shape_0"], x: ["key_float_13_cast_fp16"])
tile_12_reps_0 = const()
nan% C|GN tile_12_cast_fp16 = tile(reps: ["tile_12_reps_0"], x: ["reshape_24_cast_fp16"])
reshape_25_shape_0 = const()
nan% C|GN reshape_25_cast_fp16 = ios17.reshape(x: ["tile_12_cast_fp16"], shape: ["reshape_25_shape_0"])
transpose_12_perm_0 = const()
keys_27_shape_0 = const()
nan% C|GN transpose_142 = ios17.transpose(perm: ["transpose_12_perm_0"], x: ["reshape_25_cast_fp16"])
nan% C|GN keys_27_cast_fp16 = ios17.reshape(shape: ["keys_27_shape_0"], x: ["transpose_142"])
reshape_26_shape_0 = const()
nan% C|GN reshape_26_cast_fp16 = ios17.reshape(shape: ["reshape_26_shape_0"], x: ["var_965_cast_fp16_2"])
tile_13_reps_0 = const()
nan% C|GN tile_13_cast_fp16 = tile(x: ["reshape_26_cast_fp16"], reps: ["tile_13_reps_0"])
reshape_27_shape_0 = const()
nan% C|GN reshape_27_cast_fp16 = ios17.reshape(shape: ["reshape_27_shape_0"], x: ["tile_13_cast_fp16"])
transpose_13_perm_0 = const()
values_27_shape_0 = const()
nan% C|GN transpose_141 = ios17.transpose(x: ["reshape_27_cast_fp16"], perm: ["transpose_13_perm_0"])
nan% C|GN values_27_cast_fp16 = ios17.reshape(x: ["transpose_141"], shape: ["values_27_shape_0"])
mul_6_y_0_to_fp16 = const()
nan% C|GN mul_6_cast_fp16 = ios17.mul(x: ["query_float_13_cast_fp16"], y: ["mul_6_y_0_to_fp16"])
matmul_6_transpose_y_0 = const()
matmul_6_transpose_x_0 = const()
nan% C|GN matmul_6_cast_fp16 = ios17.matmul(y: ["keys_27_cast_fp16"], transpose_y: ["matmul_6_transpose_y_0"], transpose_x: ["matmul_6_transpose_x_0"], x: ["mul_6_cast_fp16"])
nan% C|GN add_13_cast_fp16 = ios17.add(y: ["causal_mask_3_to_fp16"], x: ["matmul_6_cast_fp16"])
softmax_6_axis_0 = const()
nan% C|GN softmax_6_cast_fp16 = ios16.softmax(x: ["add_13_cast_fp16"], axis: ["softmax_6_axis_0"])
attn_output_25_transpose_x_0 = const()
attn_output_25_transpose_y_0 = const()
nan% C|GN attn_output_25_cast_fp16 = ios17.matmul(y: ["values_27_cast_fp16"], transpose_y: ["attn_output_25_transpose_y_0"], transpose_x: ["attn_output_25_transpose_x_0"], x: ["softmax_6_cast_fp16"])
var_1039_perm_0 = const()
var_1041 = const()
nan% C|GN transpose_140 = ios17.transpose(x: ["attn_output_25_cast_fp16"], perm: ["var_1039_perm_0"])
nan% C|GN input_51_cast_fp16 = ios17.reshape(x: ["transpose_140"], shape: ["var_1041"])
model_transformer_layers_6_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_25_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_6_attn_out_proj_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_51_cast_fp16"])
nan% C|GN x_135_cast_fp16 = ios17.add(y: ["linear_25_cast_fp16"], x: ["x_119_cast_fp16"])
var_10_promoted_to_fp16_27 = const()
nan% C|GN var_1048_cast_fp16 = ios17.pow(x: ["x_135_cast_fp16"], y: ["var_10_promoted_to_fp16_27"])
var_1049 = const()
nan% C|GN var_1050_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_1049"], x: ["var_1048_cast_fp16"])
var_1051_to_fp16 = const()
nan% C|GN var_1052_cast_fp16 = ios17.add(x: ["var_1050_cast_fp16"], y: ["var_1051_to_fp16"])
var_1053_epsilon_0 = const()
nan% C|GN var_1053_cast_fp16 = ios17.rsqrt(epsilon: ["var_1053_epsilon_0"], x: ["var_1052_cast_fp16"])
nan% C|GN var_1054_cast_fp16 = ios17.mul(x: ["x_135_cast_fp16"], y: ["var_1053_cast_fp16"])
model_transformer_layers_6_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_53_cast_fp16 = ios17.mul(x: ["var_1054_cast_fp16"], y: ["model_transformer_layers_6_ffn_norm_weight_to_fp16"])
model_transformer_layers_6_ffn_proj_1_weight_to_fp16 = const()
linear_26_bias_0_to_fp16 = const()
nan% C|GN linear_26_cast_fp16 = ios17.linear(x: ["input_53_cast_fp16"], weight: ["model_transformer_layers_6_ffn_proj_1_weight_to_fp16"], bias: ["linear_26_bias_0_to_fp16"])
var_1064_split_sizes_0 = const()
var_1064_axis_0 = const()
nan% C|GN var_1064_cast_fp16_0, var_1064_cast_fp16_1 = split(x: ["linear_26_cast_fp16"], split_sizes: ["var_1064_split_sizes_0"], axis: ["var_1064_axis_0"])
nan% C|GN var_1066_cast_fp16 = ios16.silu(x: ["var_1064_cast_fp16_0"])
nan% C|GN input_57_cast_fp16 = ios17.mul(x: ["var_1066_cast_fp16"], y: ["var_1064_cast_fp16_1"])
model_transformer_layers_6_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_27_cast_fp16 = ios17.linear(x: ["input_57_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_6_ffn_proj_2_weight_to_fp16"])
nan% C|GN x_139_cast_fp16 = ios17.add(y: ["linear_27_cast_fp16"], x: ["x_135_cast_fp16"])
var_10_promoted_to_fp16_28 = const()
nan% C|GN var_1077_cast_fp16 = ios17.pow(x: ["x_139_cast_fp16"], y: ["var_10_promoted_to_fp16_28"])
var_1078 = const()
nan% C|GN var_1079_cast_fp16 = ios16.reduce_mean(axes: ["var_1078"], keep_dims: ["var_22"], x: ["var_1077_cast_fp16"])
var_1080_to_fp16 = const()
nan% C|GN var_1081_cast_fp16 = ios17.add(x: ["var_1079_cast_fp16"], y: ["var_1080_to_fp16"])
var_1082_epsilon_0 = const()
nan% C|GN var_1082_cast_fp16 = ios17.rsqrt(epsilon: ["var_1082_epsilon_0"], x: ["var_1081_cast_fp16"])
nan% C|GN var_1083_cast_fp16 = ios17.mul(x: ["x_139_cast_fp16"], y: ["var_1082_cast_fp16"])
model_transformer_layers_7_attn_norm_weight_to_fp16 = const()
nan% C|GN hidden_states_43_cast_fp16 = ios17.mul(x: ["var_1083_cast_fp16"], y: ["model_transformer_layers_7_attn_norm_weight_to_fp16"])
model_transformer_layers_7_attn_qkv_proj_weight_to_fp16 = const()
nan% C|GN linear_28_cast_fp16 = ios17.linear(bias: ["linear_12_bias_0_to_fp16"], weight: ["model_transformer_layers_7_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_43_cast_fp16"])
var_1097 = const()
nan% C|GN qkv_31_cast_fp16 = ios17.reshape(x: ["linear_28_cast_fp16"], shape: ["var_1097"])
var_1099_perm_0 = const()
var_1100 = const()
var_1101_axis_0 = const()
nan% C|GN transpose_139 = ios17.transpose(perm: ["var_1099_perm_0"], x: ["qkv_31_cast_fp16"])
nan% C|GN var_1101_cast_fp16_0, var_1101_cast_fp16_1, var_1101_cast_fp16_2 = split(split_sizes: ["var_1100"], x: ["transpose_139"], axis: ["var_1101_axis_0"])
var_10_promoted_to_fp16_29 = const()
nan% C|GN var_1107_cast_fp16 = ios17.pow(x: ["var_1101_cast_fp16_0"], y: ["var_10_promoted_to_fp16_29"])
var_1108 = const()
nan% C|GN var_1109_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_1107_cast_fp16"], axes: ["var_1108"])
var_1110_to_fp16 = const()
nan% C|GN var_1111_cast_fp16 = ios17.add(y: ["var_1110_to_fp16"], x: ["var_1109_cast_fp16"])
var_1112_epsilon_0 = const()
nan% C|GN var_1112_cast_fp16 = ios17.rsqrt(x: ["var_1111_cast_fp16"], epsilon: ["var_1112_epsilon_0"])
nan% C|GN var_1113_cast_fp16 = ios17.mul(x: ["var_1101_cast_fp16_0"], y: ["var_1112_cast_fp16"])
model_transformer_layers_7_attn_q_norm_weight_to_fp16 = const()
nan% C|GN query_15_cast_fp16 = ios17.mul(x: ["var_1113_cast_fp16"], y: ["model_transformer_layers_7_attn_q_norm_weight_to_fp16"])
var_10_promoted_to_fp16_30 = const()
nan% C|GN var_1118_cast_fp16 = ios17.pow(x: ["var_1101_cast_fp16_1"], y: ["var_10_promoted_to_fp16_30"])
var_1119 = const()
nan% C|GN var_1120_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_1118_cast_fp16"], axes: ["var_1119"])
var_1121_to_fp16 = const()
nan% C|GN var_1122_cast_fp16 = ios17.add(x: ["var_1120_cast_fp16"], y: ["var_1121_to_fp16"])
var_1123_epsilon_0 = const()
nan% C|GN var_1123_cast_fp16 = ios17.rsqrt(epsilon: ["var_1123_epsilon_0"], x: ["var_1122_cast_fp16"])
nan% C|GN var_1124_cast_fp16 = ios17.mul(x: ["var_1101_cast_fp16_1"], y: ["var_1123_cast_fp16"])
model_transformer_layers_7_attn_k_norm_weight_to_fp16 = const()
nan% C|GN key_15_cast_fp16 = ios17.mul(x: ["var_1124_cast_fp16"], y: ["model_transformer_layers_7_attn_k_norm_weight_to_fp16"])
nan% C|GN var_1141_cast_fp16 = ios17.mul(y: ["var_187_to_fp16"], x: ["query_15_cast_fp16"])
var_1142_split_sizes_0 = const()
var_1142_axis_0 = const()
nan% C|GN var_1142_cast_fp16_0, var_1142_cast_fp16_1 = split(split_sizes: ["var_1142_split_sizes_0"], axis: ["var_1142_axis_0"], x: ["query_15_cast_fp16"])
const_55_promoted_to_fp16 = const()
nan% C|GN var_1144_cast_fp16 = ios17.mul(x: ["var_1142_cast_fp16_1"], y: ["const_55_promoted_to_fp16"])
var_1146_interleave_0 = const()
nan% C|GN var_1146_cast_fp16 = ios17.concat(values: ["var_1144_cast_fp16", "var_1142_cast_fp16_0"], axis: ["var_5"], interleave: ["var_1146_interleave_0"])
nan% C|GN var_1147_cast_fp16 = ios17.mul(x: ["var_1146_cast_fp16"], y: ["var_183_to_fp16"])
nan% C|GN query_float_15_cast_fp16 = ios17.add(y: ["var_1147_cast_fp16"], x: ["var_1141_cast_fp16"])
nan% C|GN var_1153_cast_fp16 = ios17.mul(x: ["key_15_cast_fp16"], y: ["var_187_to_fp16"])
var_1154_split_sizes_0 = const()
var_1154_axis_0 = const()
nan% C|GN var_1154_cast_fp16_0, var_1154_cast_fp16_1 = split(split_sizes: ["var_1154_split_sizes_0"], x: ["key_15_cast_fp16"], axis: ["var_1154_axis_0"])
const_56_promoted_to_fp16 = const()
nan% C|GN var_1156_cast_fp16 = ios17.mul(y: ["const_56_promoted_to_fp16"], x: ["var_1154_cast_fp16_1"])
var_1158_interleave_0 = const()
nan% C|GN var_1158_cast_fp16 = ios17.concat(values: ["var_1156_cast_fp16", "var_1154_cast_fp16_0"], interleave: ["var_1158_interleave_0"], axis: ["var_5"])
nan% C|GN var_1159_cast_fp16 = ios17.mul(y: ["var_183_to_fp16"], x: ["var_1158_cast_fp16"])
nan% C|GN key_float_15_cast_fp16 = ios17.add(y: ["var_1159_cast_fp16"], x: ["var_1153_cast_fp16"])
reshape_28_shape_0 = const()
nan% C|GN reshape_28_cast_fp16 = ios17.reshape(x: ["key_float_15_cast_fp16"], shape: ["reshape_28_shape_0"])
tile_14_reps_0 = const()
nan% C|GN tile_14_cast_fp16 = tile(reps: ["tile_14_reps_0"], x: ["reshape_28_cast_fp16"])
reshape_29_shape_0 = const()
nan% C|GN reshape_29_cast_fp16 = ios17.reshape(shape: ["reshape_29_shape_0"], x: ["tile_14_cast_fp16"])
transpose_14_perm_0 = const()
keys_31_shape_0 = const()
nan% C|GN transpose_138 = ios17.transpose(perm: ["transpose_14_perm_0"], x: ["reshape_29_cast_fp16"])
nan% C|GN keys_31_cast_fp16 = ios17.reshape(shape: ["keys_31_shape_0"], x: ["transpose_138"])
reshape_30_shape_0 = const()
nan% C|GN reshape_30_cast_fp16 = ios17.reshape(x: ["var_1101_cast_fp16_2"], shape: ["reshape_30_shape_0"])
tile_15_reps_0 = const()
nan% C|GN tile_15_cast_fp16 = tile(reps: ["tile_15_reps_0"], x: ["reshape_30_cast_fp16"])
reshape_31_shape_0 = const()
nan% C|GN reshape_31_cast_fp16 = ios17.reshape(x: ["tile_15_cast_fp16"], shape: ["reshape_31_shape_0"])
transpose_15_perm_0 = const()
values_31_shape_0 = const()
nan% C|GN transpose_137 = ios17.transpose(perm: ["transpose_15_perm_0"], x: ["reshape_31_cast_fp16"])
nan% C|GN values_31_cast_fp16 = ios17.reshape(x: ["transpose_137"], shape: ["values_31_shape_0"])
mul_7_y_0_to_fp16 = const()
nan% C|GN mul_7_cast_fp16 = ios17.mul(x: ["query_float_15_cast_fp16"], y: ["mul_7_y_0_to_fp16"])
matmul_7_transpose_y_0 = const()
matmul_7_transpose_x_0 = const()
nan% C|GN matmul_7_cast_fp16 = ios17.matmul(y: ["keys_31_cast_fp16"], transpose_x: ["matmul_7_transpose_x_0"], transpose_y: ["matmul_7_transpose_y_0"], x: ["mul_7_cast_fp16"])
nan% C|GN add_15_cast_fp16 = ios17.add(x: ["matmul_7_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_7_axis_0 = const()
nan% C|GN softmax_7_cast_fp16 = ios16.softmax(axis: ["softmax_7_axis_0"], x: ["add_15_cast_fp16"])
attn_output_29_transpose_x_0 = const()
attn_output_29_transpose_y_0 = const()
nan% C|GN attn_output_29_cast_fp16 = ios17.matmul(y: ["values_31_cast_fp16"], transpose_x: ["attn_output_29_transpose_x_0"], x: ["softmax_7_cast_fp16"], transpose_y: ["attn_output_29_transpose_y_0"])
var_1175_perm_0 = const()
var_1177 = const()
nan% C|GN transpose_136 = ios17.transpose(x: ["attn_output_29_cast_fp16"], perm: ["var_1175_perm_0"])
nan% C|GN input_59_cast_fp16 = ios17.reshape(x: ["transpose_136"], shape: ["var_1177"])
model_transformer_layers_7_attn_out_proj_weight_to_fp16 = const()
nan% C|GN linear_29_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_7_attn_out_proj_weight_to_fp16"], x: ["input_59_cast_fp16"])
nan% C|GN x_155_cast_fp16 = ios17.add(x: ["x_139_cast_fp16"], y: ["linear_29_cast_fp16"])
var_10_promoted_to_fp16_31 = const()
nan% C|GN var_1184_cast_fp16 = ios17.pow(x: ["x_155_cast_fp16"], y: ["var_10_promoted_to_fp16_31"])
var_1185 = const()
nan% C|GN var_1186_cast_fp16 = ios16.reduce_mean(x: ["var_1184_cast_fp16"], axes: ["var_1185"], keep_dims: ["var_22"])
var_1187_to_fp16 = const()
nan% C|GN var_1188_cast_fp16 = ios17.add(x: ["var_1186_cast_fp16"], y: ["var_1187_to_fp16"])
var_1189_epsilon_0 = const()
nan% C|GN var_1189_cast_fp16 = ios17.rsqrt(epsilon: ["var_1189_epsilon_0"], x: ["var_1188_cast_fp16"])
nan% C|GN var_1190_cast_fp16 = ios17.mul(x: ["x_155_cast_fp16"], y: ["var_1189_cast_fp16"])
model_transformer_layers_7_ffn_norm_weight_to_fp16 = const()
nan% C|GN input_61_cast_fp16 = ios17.mul(y: ["model_transformer_layers_7_ffn_norm_weight_to_fp16"], x: ["var_1190_cast_fp16"])
model_transformer_layers_7_ffn_proj_1_weight_to_fp16 = const()
linear_30_bias_0_to_fp16 = const()
nan% C|GN linear_30_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_7_ffn_proj_1_weight_to_fp16"], bias: ["linear_30_bias_0_to_fp16"], x: ["input_61_cast_fp16"])
var_1200_split_sizes_0 = const()
var_1200_axis_0 = const()
nan% C|GN var_1200_cast_fp16_0, var_1200_cast_fp16_1 = split(axis: ["var_1200_axis_0"], x: ["linear_30_cast_fp16"], split_sizes: ["var_1200_split_sizes_0"])
nan% C|GN var_1202_cast_fp16 = ios16.silu(x: ["var_1200_cast_fp16_0"])
nan% C|GN input_65_cast_fp16 = ios17.mul(y: ["var_1200_cast_fp16_1"], x: ["var_1202_cast_fp16"])
model_transformer_layers_7_ffn_proj_2_weight_to_fp16 = const()
nan% C|GN linear_31_cast_fp16 = ios17.linear(x: ["input_65_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_7_ffn_proj_2_weight_to_fp16"])
nan% C|GN x_159_cast_fp16 = ios17.add(y: ["linear_31_cast_fp16"], x: ["x_155_cast_fp16"])
var_10_promoted_to_fp16_32 = const()
nan% C|GN var_1213_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_32"], x: ["x_159_cast_fp16"])
var_1214 = const()
nan% C|GN var_1215_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_1214"], x: ["var_1213_cast_fp16"])
var_1216_to_fp16 = const()