File size: 12,967 Bytes
3eb68fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000055,0.05000,4.851
0,self_attn.k_proj,0.0000000067,0.05000,4.868
0,self_attn.q_proj,0.0000000245,0.05000,4.875
0,self_attn.o_proj,0.0000001023,0.05000,1.463
0,mlp.gate_proj,0.0000014832,0.05000,3.275
0,mlp.up_proj,0.0000012088,0.05000,3.334
0,mlp.down_proj,0.0000007021,0.05000,5.384
1,self_attn.q_proj,0.0000000516,0.05000,4.584
1,self_attn.v_proj,0.0000000140,0.05000,4.683
1,self_attn.k_proj,0.0000000128,0.05000,4.723
1,self_attn.o_proj,0.0000001024,0.05000,1.334
1,mlp.up_proj,0.0000235446,0.05000,3.642
1,mlp.gate_proj,0.0000679487,0.05000,3.677
1,mlp.down_proj,0.0000010552,0.05000,5.334
2,self_attn.k_proj,0.0000000361,0.05000,4.120
2,self_attn.v_proj,0.0000000386,0.05000,4.189
2,self_attn.q_proj,0.0000001411,0.05000,4.244
2,self_attn.o_proj,0.0000002282,0.05000,1.332
2,mlp.gate_proj,0.0000977210,0.05000,3.152
2,mlp.up_proj,0.0000492783,0.05000,3.153
2,mlp.down_proj,0.0000008231,0.05000,5.410
3,self_attn.q_proj,0.0000003171,0.05000,4.748
3,self_attn.k_proj,0.0000000801,0.05000,4.758
3,self_attn.v_proj,0.0000000896,0.05000,4.779
3,self_attn.o_proj,0.0000002193,0.05000,1.414
3,mlp.gate_proj,0.0001480885,0.05000,3.153
3,mlp.up_proj,0.0000745903,0.05000,3.224
3,mlp.down_proj,0.0000013200,0.05000,5.582
4,self_attn.v_proj,0.0000001349,0.05000,5.075
4,self_attn.q_proj,0.0000004968,0.05000,5.101
4,self_attn.k_proj,0.0000001217,0.05000,5.105
4,self_attn.o_proj,0.0000003706,0.05000,1.499
4,mlp.up_proj,0.0001282589,0.05000,3.353
4,mlp.gate_proj,0.0002110812,0.05000,3.373
4,mlp.down_proj,0.0000014401,0.05000,5.379
5,self_attn.q_proj,0.0000005028,0.05000,4.952
5,self_attn.v_proj,0.0000001383,0.05000,4.969
5,self_attn.k_proj,0.0000001241,0.05000,4.978
5,self_attn.o_proj,0.0000003825,0.05000,1.373
5,mlp.up_proj,0.0000865598,0.05000,3.568
5,mlp.gate_proj,0.0001970606,0.05000,3.592
5,mlp.down_proj,0.0000036706,0.05000,5.314
6,self_attn.v_proj,0.0000002469,0.05000,4.978
6,self_attn.q_proj,0.0000008867,0.05000,5.061
6,self_attn.k_proj,0.0000002098,0.05000,5.067
6,self_attn.o_proj,0.0000005215,0.05000,1.377
6,mlp.up_proj,0.0001740953,0.05000,2.960
6,mlp.gate_proj,0.0003093244,0.05000,3.049
6,mlp.down_proj,0.0000451711,0.05000,5.501
7,self_attn.q_proj,0.0000032589,0.05000,4.941
7,self_attn.k_proj,0.0000007494,0.05000,4.953
7,self_attn.v_proj,0.0000008548,0.05000,4.969
7,self_attn.o_proj,0.0000011350,0.05000,1.371
7,mlp.gate_proj,0.0002542938,0.05000,3.356
7,mlp.up_proj,0.0001292021,0.05000,3.370
7,mlp.down_proj,0.0000053761,0.05000,5.684
8,self_attn.v_proj,0.0000008264,0.05000,4.899
8,self_attn.q_proj,0.0000030318,0.05000,4.924
8,self_attn.k_proj,0.0000007100,0.05000,4.975
8,self_attn.o_proj,0.0000013708,0.05000,1.409
8,mlp.up_proj,0.0000463701,0.05000,3.645
8,mlp.gate_proj,0.0000779276,0.05000,3.699
8,mlp.down_proj,0.0000070508,0.05000,5.465
9,self_attn.v_proj,0.0000006791,0.05000,5.006
9,self_attn.q_proj,0.0000025509,0.05000,5.023
9,self_attn.k_proj,0.0000006144,0.05000,5.032
9,self_attn.o_proj,0.0000013103,0.05000,1.424
9,mlp.gate_proj,0.0000426071,0.05000,3.332
9,mlp.up_proj,0.0000384853,0.05000,3.368
9,mlp.down_proj,0.0000094974,0.05000,5.585
10,self_attn.k_proj,0.0000008871,0.05000,5.058
10,self_attn.v_proj,0.0000010250,0.05000,5.070
10,self_attn.q_proj,0.0000039080,0.05000,5.100
10,self_attn.o_proj,0.0000023923,0.05000,1.394
10,mlp.gate_proj,0.0000476698,0.05000,2.910
10,mlp.up_proj,0.0000438941,0.05000,2.918
10,mlp.down_proj,0.0000099568,0.05000,5.422
11,self_attn.v_proj,0.0000016619,0.05000,5.104
11,self_attn.q_proj,0.0000061273,0.05000,5.127
11,self_attn.k_proj,0.0000014110,0.05000,5.151
11,self_attn.o_proj,0.0000034863,0.05000,1.411
11,mlp.up_proj,0.0000549275,0.05000,3.648
11,mlp.gate_proj,0.0000592392,0.05000,3.697
11,mlp.down_proj,0.0000136331,0.05000,5.316
12,self_attn.k_proj,0.0000015385,0.05000,4.950
12,self_attn.v_proj,0.0000017779,0.05000,4.997
12,self_attn.q_proj,0.0000064274,0.05000,5.019
12,self_attn.o_proj,0.0000026011,0.05000,1.423
12,mlp.gate_proj,0.0000721614,0.05000,3.257
12,mlp.up_proj,0.0000664742,0.05000,3.284
12,mlp.down_proj,0.0000165714,0.05000,5.587
13,self_attn.v_proj,0.0000035299,0.05000,4.510
13,self_attn.q_proj,0.0000127921,0.05000,4.531
13,self_attn.k_proj,0.0000029702,0.05000,4.564
13,self_attn.o_proj,0.0000040227,0.05000,1.394
13,mlp.up_proj,0.0000723846,0.05000,2.925
13,mlp.gate_proj,0.0000838850,0.05000,2.940
13,mlp.down_proj,0.0000187251,0.05000,5.496
14,self_attn.v_proj,0.0000024263,0.05000,4.790
14,self_attn.q_proj,0.0000085204,0.05000,4.817
14,self_attn.k_proj,0.0000020249,0.05000,4.848
14,self_attn.o_proj,0.0000040741,0.05000,1.353
14,mlp.gate_proj,0.0000798361,0.05000,2.961
14,mlp.up_proj,0.0000716579,0.05000,2.974
14,mlp.down_proj,0.0000192025,0.05000,5.455
15,self_attn.v_proj,0.0000024046,0.05000,4.509
15,self_attn.k_proj,0.0000021407,0.05000,4.558
15,self_attn.q_proj,0.0000092248,0.05000,4.569
15,self_attn.o_proj,0.0000045808,0.05000,1.348
15,mlp.up_proj,0.0000751190,0.05000,3.045
15,mlp.gate_proj,0.0000782121,0.05000,3.079
15,mlp.down_proj,0.0000209370,0.05000,5.471
16,self_attn.q_proj,0.0000100945,0.05000,5.142
16,self_attn.v_proj,0.0000027845,0.05000,5.142
16,self_attn.k_proj,0.0000023761,0.05000,5.241
16,self_attn.o_proj,0.0000062379,0.05000,1.380
16,mlp.up_proj,0.0000748809,0.05000,2.870
16,mlp.gate_proj,0.0000727667,0.05000,2.884
16,mlp.down_proj,0.0000219730,0.05000,5.353
17,self_attn.k_proj,0.0000028254,0.05000,4.584
17,self_attn.q_proj,0.0000126655,0.05000,4.615
17,self_attn.v_proj,0.0000032815,0.05000,4.642
17,self_attn.o_proj,0.0000061028,0.05000,1.371
17,mlp.gate_proj,0.0000801209,0.05000,3.312
17,mlp.up_proj,0.0000837841,0.05000,3.340
17,mlp.down_proj,0.0000245772,0.05000,5.519
18,self_attn.v_proj,0.0000047651,0.05000,4.578
18,self_attn.k_proj,0.0000041778,0.05000,4.587
18,self_attn.q_proj,0.0000184517,0.05000,4.621
18,self_attn.o_proj,0.0000063895,0.05000,1.366
18,mlp.up_proj,0.0000915147,0.05000,3.025
18,mlp.gate_proj,0.0000842209,0.05000,3.032
18,mlp.down_proj,0.0000275443,0.05000,5.559
19,self_attn.k_proj,0.0000049127,0.05000,4.855
19,self_attn.v_proj,0.0000055556,0.05000,4.940
19,self_attn.q_proj,0.0000216752,0.05000,4.962
19,self_attn.o_proj,0.0000083279,0.05000,1.398
19,mlp.up_proj,0.0001011631,0.05000,2.898
19,mlp.gate_proj,0.0000937019,0.05000,2.930
19,mlp.down_proj,0.0000327259,0.05000,5.399
20,self_attn.v_proj,0.0000109982,0.05000,4.754
20,self_attn.k_proj,0.0000086623,0.05000,4.805
20,self_attn.q_proj,0.0000405637,0.05000,4.808
20,self_attn.o_proj,0.0000105337,0.05000,1.392
20,mlp.gate_proj,0.0000998617,0.05000,3.189
20,mlp.up_proj,0.0001093770,0.05000,3.217
20,mlp.down_proj,0.0000415528,0.05000,5.438
21,self_attn.k_proj,0.0000117670,0.05000,4.439
21,self_attn.q_proj,0.0000514724,0.05000,4.453
21,self_attn.v_proj,0.0000140596,0.05000,4.516
21,self_attn.o_proj,0.0000151081,0.05000,1.453
21,mlp.gate_proj,0.0001088591,0.05000,2.926
21,mlp.up_proj,0.0001167877,0.05000,2.938
21,mlp.down_proj,0.0000477426,0.05000,5.551
22,self_attn.v_proj,0.0000117180,0.05000,4.538
22,self_attn.k_proj,0.0000094837,0.05000,4.573
22,self_attn.q_proj,0.0000441959,0.05000,4.617
22,self_attn.o_proj,0.0000190037,0.05000,1.410
22,mlp.up_proj,0.0001366769,0.05000,3.587
22,mlp.gate_proj,0.0001259929,0.05000,3.622
22,mlp.down_proj,0.0000632469,0.05000,5.323
23,self_attn.v_proj,0.0000206423,0.05000,5.066
23,self_attn.q_proj,0.0000767674,0.05000,5.099
23,self_attn.k_proj,0.0000157870,0.05000,5.111
23,self_attn.o_proj,0.0000160790,0.05000,1.398
23,mlp.up_proj,0.0001518091,0.05000,3.378
23,mlp.gate_proj,0.0001383609,0.05000,3.395
23,mlp.down_proj,0.0000856998,0.05000,5.398
24,self_attn.v_proj,0.0000329967,0.05000,5.030
24,self_attn.k_proj,0.0000237478,0.05000,5.054
24,self_attn.q_proj,0.0001217306,0.05000,5.056
24,self_attn.o_proj,0.0000220392,0.05000,1.360
24,mlp.gate_proj,0.0001632924,0.05000,3.625
24,mlp.up_proj,0.0001751961,0.05000,3.657
24,mlp.down_proj,0.0000941419,0.05000,5.674
25,self_attn.k_proj,0.0000201400,0.05000,4.789
25,self_attn.q_proj,0.0000949212,0.05000,4.936
25,self_attn.v_proj,0.0000253956,0.05000,4.953
25,self_attn.o_proj,0.0000235002,0.05000,1.350
25,mlp.up_proj,0.0002028768,0.05000,3.754
25,mlp.gate_proj,0.0001922596,0.05000,3.767
25,mlp.down_proj,0.0001267621,0.05000,5.595
26,self_attn.k_proj,0.0000225750,0.05000,4.881
26,self_attn.v_proj,0.0000295418,0.05000,4.921
26,self_attn.q_proj,0.0001108223,0.05000,4.949
26,self_attn.o_proj,0.0000265471,0.05000,1.385
26,mlp.up_proj,0.0002334800,0.05000,3.388
26,mlp.gate_proj,0.0002268478,0.05000,3.422
26,mlp.down_proj,0.0001893703,0.05000,5.634
27,self_attn.q_proj,0.0001638606,0.05000,5.063
27,self_attn.v_proj,0.0000431965,0.05000,5.074
27,self_attn.k_proj,0.0000341267,0.05000,5.090
27,self_attn.o_proj,0.0000498931,0.05000,1.395
27,mlp.up_proj,0.0002877421,0.05000,3.456
27,mlp.gate_proj,0.0002798097,0.05000,3.483
27,mlp.down_proj,0.0002758613,0.05000,5.413
28,self_attn.q_proj,0.0002983511,0.05000,5.019
28,self_attn.k_proj,0.0000565795,0.05000,5.021
28,self_attn.v_proj,0.0000813798,0.05000,5.037
28,self_attn.o_proj,0.0000459752,0.05000,1.567
28,mlp.gate_proj,0.0003511385,0.05000,3.554
28,mlp.up_proj,0.0003660403,0.05000,3.605
28,mlp.down_proj,0.0003470357,0.05000,5.451
29,self_attn.v_proj,0.0000988175,0.05000,4.698
29,self_attn.q_proj,0.0003714425,0.05000,4.765
29,self_attn.k_proj,0.0000788545,0.05000,4.812
29,self_attn.o_proj,0.0000517854,0.05000,1.332
29,mlp.up_proj,0.0004072312,0.05000,3.735
29,mlp.gate_proj,0.0003876266,0.05000,3.765
29,mlp.down_proj,0.0004905408,0.05000,5.507
30,self_attn.q_proj,0.0005447937,0.05000,4.696
30,self_attn.v_proj,0.0001534902,0.05000,4.769
30,self_attn.k_proj,0.0001148308,0.05000,4.786
30,self_attn.o_proj,0.0000775836,0.05000,1.395
30,mlp.gate_proj,0.0004827204,0.05000,3.138
30,mlp.up_proj,0.0005053384,0.05000,3.150
30,mlp.down_proj,0.0005774746,0.05000,5.607
31,self_attn.k_proj,0.0001142838,0.05000,4.573
31,self_attn.v_proj,0.0001417302,0.05000,4.617
31,self_attn.q_proj,0.0005521921,0.05000,4.641
31,self_attn.o_proj,0.0000683407,0.05000,1.471
31,mlp.gate_proj,0.0005245574,0.05000,3.371
31,mlp.up_proj,0.0005602224,0.05000,3.391
31,mlp.down_proj,0.0007208518,0.05000,5.466
32,self_attn.k_proj,0.0002052982,0.05000,4.842
32,self_attn.q_proj,0.0009621031,0.05000,4.916
32,self_attn.v_proj,0.0002829138,0.05000,4.930
32,self_attn.o_proj,0.0000929007,0.05000,1.420
32,mlp.gate_proj,0.0005781620,0.05000,3.086
32,mlp.up_proj,0.0006268456,0.05000,3.146
32,mlp.down_proj,0.0007982043,0.05000,5.426
33,self_attn.v_proj,0.0003873876,0.05000,4.661
33,self_attn.q_proj,0.0013329935,0.05000,4.679
33,self_attn.k_proj,0.0002684569,0.05000,4.699
33,self_attn.o_proj,0.0001004743,0.05000,1.397
33,mlp.gate_proj,0.0006186912,0.05000,3.072
33,mlp.up_proj,0.0006873148,0.05000,3.080
33,mlp.down_proj,0.0008831521,0.05000,5.923
34,self_attn.k_proj,0.0004681831,0.05000,4.314
34,self_attn.q_proj,0.0022188602,0.05000,4.330
34,self_attn.v_proj,0.0006818156,0.05000,4.369
34,self_attn.o_proj,0.0001390811,0.05000,1.400
34,mlp.gate_proj,0.0006815079,0.05000,2.850
34,mlp.up_proj,0.0007714760,0.05000,2.856
34,mlp.down_proj,0.0010444433,0.05000,5.671
35,self_attn.v_proj,0.0009464067,0.05000,4.743
35,self_attn.q_proj,0.0028209405,0.05000,4.756
35,self_attn.k_proj,0.0005987322,0.05000,4.770
35,self_attn.o_proj,0.0001368956,0.05000,1.364
35,mlp.up_proj,0.0008114978,0.05000,3.085
35,mlp.gate_proj,0.0007047556,0.05000,3.087
35,mlp.down_proj,0.0012339309,0.05000,5.406
36,self_attn.k_proj,0.0005470300,0.05000,4.830
36,self_attn.q_proj,0.0024462599,0.05000,4.882
36,self_attn.v_proj,0.0008094013,0.05000,4.888
36,self_attn.o_proj,0.0002597887,0.05000,1.454
36,mlp.up_proj,0.0008480489,0.05000,3.371
36,mlp.gate_proj,0.0007133146,0.05000,3.388
36,mlp.down_proj,0.0015971187,0.05000,5.952
37,self_attn.q_proj,0.0035302981,0.05000,4.597
37,self_attn.k_proj,0.0007176153,0.05000,4.631
37,self_attn.v_proj,0.0012357151,0.05000,4.644
37,self_attn.o_proj,0.0002945466,0.05000,1.404
37,mlp.gate_proj,0.0007103522,0.05000,3.077
37,mlp.up_proj,0.0008592182,0.05000,3.084
37,mlp.down_proj,0.0022288549,0.05000,5.345
38,self_attn.v_proj,0.0011880093,0.05000,4.358
38,self_attn.q_proj,0.0031908930,0.05000,4.372
38,self_attn.k_proj,0.0006751707,0.05000,4.399
38,self_attn.o_proj,0.0003943942,0.05000,1.340
38,mlp.gate_proj,0.0008062866,0.05000,3.148
38,mlp.up_proj,0.0009217460,0.05000,3.174
38,mlp.down_proj,0.0034524150,0.05000,5.362
39,self_attn.k_proj,0.0002554454,0.05000,4.257
39,self_attn.v_proj,0.0003735272,0.05000,4.294
39,self_attn.q_proj,0.0012166777,0.05000,4.295
39,self_attn.o_proj,0.0003847522,0.05000,1.344
39,mlp.gate_proj,0.0009165438,0.05000,3.561
39,mlp.up_proj,0.0010139998,0.05000,3.578
39,mlp.down_proj,0.0066875947,0.05000,5.467
|