File size: 12,967 Bytes
3eb68fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000055,0.05000,4.851
0,self_attn.k_proj,0.0000000067,0.05000,4.868
0,self_attn.q_proj,0.0000000245,0.05000,4.875
0,self_attn.o_proj,0.0000001023,0.05000,1.463
0,mlp.gate_proj,0.0000014832,0.05000,3.275
0,mlp.up_proj,0.0000012088,0.05000,3.334
0,mlp.down_proj,0.0000007021,0.05000,5.384
1,self_attn.q_proj,0.0000000516,0.05000,4.584
1,self_attn.v_proj,0.0000000140,0.05000,4.683
1,self_attn.k_proj,0.0000000128,0.05000,4.723
1,self_attn.o_proj,0.0000001024,0.05000,1.334
1,mlp.up_proj,0.0000235446,0.05000,3.642
1,mlp.gate_proj,0.0000679487,0.05000,3.677
1,mlp.down_proj,0.0000010552,0.05000,5.334
2,self_attn.k_proj,0.0000000361,0.05000,4.120
2,self_attn.v_proj,0.0000000386,0.05000,4.189
2,self_attn.q_proj,0.0000001411,0.05000,4.244
2,self_attn.o_proj,0.0000002282,0.05000,1.332
2,mlp.gate_proj,0.0000977210,0.05000,3.152
2,mlp.up_proj,0.0000492783,0.05000,3.153
2,mlp.down_proj,0.0000008231,0.05000,5.410
3,self_attn.q_proj,0.0000003171,0.05000,4.748
3,self_attn.k_proj,0.0000000801,0.05000,4.758
3,self_attn.v_proj,0.0000000896,0.05000,4.779
3,self_attn.o_proj,0.0000002193,0.05000,1.414
3,mlp.gate_proj,0.0001480885,0.05000,3.153
3,mlp.up_proj,0.0000745903,0.05000,3.224
3,mlp.down_proj,0.0000013200,0.05000,5.582
4,self_attn.v_proj,0.0000001349,0.05000,5.075
4,self_attn.q_proj,0.0000004968,0.05000,5.101
4,self_attn.k_proj,0.0000001217,0.05000,5.105
4,self_attn.o_proj,0.0000003706,0.05000,1.499
4,mlp.up_proj,0.0001282589,0.05000,3.353
4,mlp.gate_proj,0.0002110812,0.05000,3.373
4,mlp.down_proj,0.0000014401,0.05000,5.379
5,self_attn.q_proj,0.0000005028,0.05000,4.952
5,self_attn.v_proj,0.0000001383,0.05000,4.969
5,self_attn.k_proj,0.0000001241,0.05000,4.978
5,self_attn.o_proj,0.0000003825,0.05000,1.373
5,mlp.up_proj,0.0000865598,0.05000,3.568
5,mlp.gate_proj,0.0001970606,0.05000,3.592
5,mlp.down_proj,0.0000036706,0.05000,5.314
6,self_attn.v_proj,0.0000002469,0.05000,4.978
6,self_attn.q_proj,0.0000008867,0.05000,5.061
6,self_attn.k_proj,0.0000002098,0.05000,5.067
6,self_attn.o_proj,0.0000005215,0.05000,1.377
6,mlp.up_proj,0.0001740953,0.05000,2.960
6,mlp.gate_proj,0.0003093244,0.05000,3.049
6,mlp.down_proj,0.0000451711,0.05000,5.501
7,self_attn.q_proj,0.0000032589,0.05000,4.941
7,self_attn.k_proj,0.0000007494,0.05000,4.953
7,self_attn.v_proj,0.0000008548,0.05000,4.969
7,self_attn.o_proj,0.0000011350,0.05000,1.371
7,mlp.gate_proj,0.0002542938,0.05000,3.356
7,mlp.up_proj,0.0001292021,0.05000,3.370
7,mlp.down_proj,0.0000053761,0.05000,5.684
8,self_attn.v_proj,0.0000008264,0.05000,4.899
8,self_attn.q_proj,0.0000030318,0.05000,4.924
8,self_attn.k_proj,0.0000007100,0.05000,4.975
8,self_attn.o_proj,0.0000013708,0.05000,1.409
8,mlp.up_proj,0.0000463701,0.05000,3.645
8,mlp.gate_proj,0.0000779276,0.05000,3.699
8,mlp.down_proj,0.0000070508,0.05000,5.465
9,self_attn.v_proj,0.0000006791,0.05000,5.006
9,self_attn.q_proj,0.0000025509,0.05000,5.023
9,self_attn.k_proj,0.0000006144,0.05000,5.032
9,self_attn.o_proj,0.0000013103,0.05000,1.424
9,mlp.gate_proj,0.0000426071,0.05000,3.332
9,mlp.up_proj,0.0000384853,0.05000,3.368
9,mlp.down_proj,0.0000094974,0.05000,5.585
10,self_attn.k_proj,0.0000008871,0.05000,5.058
10,self_attn.v_proj,0.0000010250,0.05000,5.070
10,self_attn.q_proj,0.0000039080,0.05000,5.100
10,self_attn.o_proj,0.0000023923,0.05000,1.394
10,mlp.gate_proj,0.0000476698,0.05000,2.910
10,mlp.up_proj,0.0000438941,0.05000,2.918
10,mlp.down_proj,0.0000099568,0.05000,5.422
11,self_attn.v_proj,0.0000016619,0.05000,5.104
11,self_attn.q_proj,0.0000061273,0.05000,5.127
11,self_attn.k_proj,0.0000014110,0.05000,5.151
11,self_attn.o_proj,0.0000034863,0.05000,1.411
11,mlp.up_proj,0.0000549275,0.05000,3.648
11,mlp.gate_proj,0.0000592392,0.05000,3.697
11,mlp.down_proj,0.0000136331,0.05000,5.316
12,self_attn.k_proj,0.0000015385,0.05000,4.950
12,self_attn.v_proj,0.0000017779,0.05000,4.997
12,self_attn.q_proj,0.0000064274,0.05000,5.019
12,self_attn.o_proj,0.0000026011,0.05000,1.423
12,mlp.gate_proj,0.0000721614,0.05000,3.257
12,mlp.up_proj,0.0000664742,0.05000,3.284
12,mlp.down_proj,0.0000165714,0.05000,5.587
13,self_attn.v_proj,0.0000035299,0.05000,4.510
13,self_attn.q_proj,0.0000127921,0.05000,4.531
13,self_attn.k_proj,0.0000029702,0.05000,4.564
13,self_attn.o_proj,0.0000040227,0.05000,1.394
13,mlp.up_proj,0.0000723846,0.05000,2.925
13,mlp.gate_proj,0.0000838850,0.05000,2.940
13,mlp.down_proj,0.0000187251,0.05000,5.496
14,self_attn.v_proj,0.0000024263,0.05000,4.790
14,self_attn.q_proj,0.0000085204,0.05000,4.817
14,self_attn.k_proj,0.0000020249,0.05000,4.848
14,self_attn.o_proj,0.0000040741,0.05000,1.353
14,mlp.gate_proj,0.0000798361,0.05000,2.961
14,mlp.up_proj,0.0000716579,0.05000,2.974
14,mlp.down_proj,0.0000192025,0.05000,5.455
15,self_attn.v_proj,0.0000024046,0.05000,4.509
15,self_attn.k_proj,0.0000021407,0.05000,4.558
15,self_attn.q_proj,0.0000092248,0.05000,4.569
15,self_attn.o_proj,0.0000045808,0.05000,1.348
15,mlp.up_proj,0.0000751190,0.05000,3.045
15,mlp.gate_proj,0.0000782121,0.05000,3.079
15,mlp.down_proj,0.0000209370,0.05000,5.471
16,self_attn.q_proj,0.0000100945,0.05000,5.142
16,self_attn.v_proj,0.0000027845,0.05000,5.142
16,self_attn.k_proj,0.0000023761,0.05000,5.241
16,self_attn.o_proj,0.0000062379,0.05000,1.380
16,mlp.up_proj,0.0000748809,0.05000,2.870
16,mlp.gate_proj,0.0000727667,0.05000,2.884
16,mlp.down_proj,0.0000219730,0.05000,5.353
17,self_attn.k_proj,0.0000028254,0.05000,4.584
17,self_attn.q_proj,0.0000126655,0.05000,4.615
17,self_attn.v_proj,0.0000032815,0.05000,4.642
17,self_attn.o_proj,0.0000061028,0.05000,1.371
17,mlp.gate_proj,0.0000801209,0.05000,3.312
17,mlp.up_proj,0.0000837841,0.05000,3.340
17,mlp.down_proj,0.0000245772,0.05000,5.519
18,self_attn.v_proj,0.0000047651,0.05000,4.578
18,self_attn.k_proj,0.0000041778,0.05000,4.587
18,self_attn.q_proj,0.0000184517,0.05000,4.621
18,self_attn.o_proj,0.0000063895,0.05000,1.366
18,mlp.up_proj,0.0000915147,0.05000,3.025
18,mlp.gate_proj,0.0000842209,0.05000,3.032
18,mlp.down_proj,0.0000275443,0.05000,5.559
19,self_attn.k_proj,0.0000049127,0.05000,4.855
19,self_attn.v_proj,0.0000055556,0.05000,4.940
19,self_attn.q_proj,0.0000216752,0.05000,4.962
19,self_attn.o_proj,0.0000083279,0.05000,1.398
19,mlp.up_proj,0.0001011631,0.05000,2.898
19,mlp.gate_proj,0.0000937019,0.05000,2.930
19,mlp.down_proj,0.0000327259,0.05000,5.399
20,self_attn.v_proj,0.0000109982,0.05000,4.754
20,self_attn.k_proj,0.0000086623,0.05000,4.805
20,self_attn.q_proj,0.0000405637,0.05000,4.808
20,self_attn.o_proj,0.0000105337,0.05000,1.392
20,mlp.gate_proj,0.0000998617,0.05000,3.189
20,mlp.up_proj,0.0001093770,0.05000,3.217
20,mlp.down_proj,0.0000415528,0.05000,5.438
21,self_attn.k_proj,0.0000117670,0.05000,4.439
21,self_attn.q_proj,0.0000514724,0.05000,4.453
21,self_attn.v_proj,0.0000140596,0.05000,4.516
21,self_attn.o_proj,0.0000151081,0.05000,1.453
21,mlp.gate_proj,0.0001088591,0.05000,2.926
21,mlp.up_proj,0.0001167877,0.05000,2.938
21,mlp.down_proj,0.0000477426,0.05000,5.551
22,self_attn.v_proj,0.0000117180,0.05000,4.538
22,self_attn.k_proj,0.0000094837,0.05000,4.573
22,self_attn.q_proj,0.0000441959,0.05000,4.617
22,self_attn.o_proj,0.0000190037,0.05000,1.410
22,mlp.up_proj,0.0001366769,0.05000,3.587
22,mlp.gate_proj,0.0001259929,0.05000,3.622
22,mlp.down_proj,0.0000632469,0.05000,5.323
23,self_attn.v_proj,0.0000206423,0.05000,5.066
23,self_attn.q_proj,0.0000767674,0.05000,5.099
23,self_attn.k_proj,0.0000157870,0.05000,5.111
23,self_attn.o_proj,0.0000160790,0.05000,1.398
23,mlp.up_proj,0.0001518091,0.05000,3.378
23,mlp.gate_proj,0.0001383609,0.05000,3.395
23,mlp.down_proj,0.0000856998,0.05000,5.398
24,self_attn.v_proj,0.0000329967,0.05000,5.030
24,self_attn.k_proj,0.0000237478,0.05000,5.054
24,self_attn.q_proj,0.0001217306,0.05000,5.056
24,self_attn.o_proj,0.0000220392,0.05000,1.360
24,mlp.gate_proj,0.0001632924,0.05000,3.625
24,mlp.up_proj,0.0001751961,0.05000,3.657
24,mlp.down_proj,0.0000941419,0.05000,5.674
25,self_attn.k_proj,0.0000201400,0.05000,4.789
25,self_attn.q_proj,0.0000949212,0.05000,4.936
25,self_attn.v_proj,0.0000253956,0.05000,4.953
25,self_attn.o_proj,0.0000235002,0.05000,1.350
25,mlp.up_proj,0.0002028768,0.05000,3.754
25,mlp.gate_proj,0.0001922596,0.05000,3.767
25,mlp.down_proj,0.0001267621,0.05000,5.595
26,self_attn.k_proj,0.0000225750,0.05000,4.881
26,self_attn.v_proj,0.0000295418,0.05000,4.921
26,self_attn.q_proj,0.0001108223,0.05000,4.949
26,self_attn.o_proj,0.0000265471,0.05000,1.385
26,mlp.up_proj,0.0002334800,0.05000,3.388
26,mlp.gate_proj,0.0002268478,0.05000,3.422
26,mlp.down_proj,0.0001893703,0.05000,5.634
27,self_attn.q_proj,0.0001638606,0.05000,5.063
27,self_attn.v_proj,0.0000431965,0.05000,5.074
27,self_attn.k_proj,0.0000341267,0.05000,5.090
27,self_attn.o_proj,0.0000498931,0.05000,1.395
27,mlp.up_proj,0.0002877421,0.05000,3.456
27,mlp.gate_proj,0.0002798097,0.05000,3.483
27,mlp.down_proj,0.0002758613,0.05000,5.413
28,self_attn.q_proj,0.0002983511,0.05000,5.019
28,self_attn.k_proj,0.0000565795,0.05000,5.021
28,self_attn.v_proj,0.0000813798,0.05000,5.037
28,self_attn.o_proj,0.0000459752,0.05000,1.567
28,mlp.gate_proj,0.0003511385,0.05000,3.554
28,mlp.up_proj,0.0003660403,0.05000,3.605
28,mlp.down_proj,0.0003470357,0.05000,5.451
29,self_attn.v_proj,0.0000988175,0.05000,4.698
29,self_attn.q_proj,0.0003714425,0.05000,4.765
29,self_attn.k_proj,0.0000788545,0.05000,4.812
29,self_attn.o_proj,0.0000517854,0.05000,1.332
29,mlp.up_proj,0.0004072312,0.05000,3.735
29,mlp.gate_proj,0.0003876266,0.05000,3.765
29,mlp.down_proj,0.0004905408,0.05000,5.507
30,self_attn.q_proj,0.0005447937,0.05000,4.696
30,self_attn.v_proj,0.0001534902,0.05000,4.769
30,self_attn.k_proj,0.0001148308,0.05000,4.786
30,self_attn.o_proj,0.0000775836,0.05000,1.395
30,mlp.gate_proj,0.0004827204,0.05000,3.138
30,mlp.up_proj,0.0005053384,0.05000,3.150
30,mlp.down_proj,0.0005774746,0.05000,5.607
31,self_attn.k_proj,0.0001142838,0.05000,4.573
31,self_attn.v_proj,0.0001417302,0.05000,4.617
31,self_attn.q_proj,0.0005521921,0.05000,4.641
31,self_attn.o_proj,0.0000683407,0.05000,1.471
31,mlp.gate_proj,0.0005245574,0.05000,3.371
31,mlp.up_proj,0.0005602224,0.05000,3.391
31,mlp.down_proj,0.0007208518,0.05000,5.466
32,self_attn.k_proj,0.0002052982,0.05000,4.842
32,self_attn.q_proj,0.0009621031,0.05000,4.916
32,self_attn.v_proj,0.0002829138,0.05000,4.930
32,self_attn.o_proj,0.0000929007,0.05000,1.420
32,mlp.gate_proj,0.0005781620,0.05000,3.086
32,mlp.up_proj,0.0006268456,0.05000,3.146
32,mlp.down_proj,0.0007982043,0.05000,5.426
33,self_attn.v_proj,0.0003873876,0.05000,4.661
33,self_attn.q_proj,0.0013329935,0.05000,4.679
33,self_attn.k_proj,0.0002684569,0.05000,4.699
33,self_attn.o_proj,0.0001004743,0.05000,1.397
33,mlp.gate_proj,0.0006186912,0.05000,3.072
33,mlp.up_proj,0.0006873148,0.05000,3.080
33,mlp.down_proj,0.0008831521,0.05000,5.923
34,self_attn.k_proj,0.0004681831,0.05000,4.314
34,self_attn.q_proj,0.0022188602,0.05000,4.330
34,self_attn.v_proj,0.0006818156,0.05000,4.369
34,self_attn.o_proj,0.0001390811,0.05000,1.400
34,mlp.gate_proj,0.0006815079,0.05000,2.850
34,mlp.up_proj,0.0007714760,0.05000,2.856
34,mlp.down_proj,0.0010444433,0.05000,5.671
35,self_attn.v_proj,0.0009464067,0.05000,4.743
35,self_attn.q_proj,0.0028209405,0.05000,4.756
35,self_attn.k_proj,0.0005987322,0.05000,4.770
35,self_attn.o_proj,0.0001368956,0.05000,1.364
35,mlp.up_proj,0.0008114978,0.05000,3.085
35,mlp.gate_proj,0.0007047556,0.05000,3.087
35,mlp.down_proj,0.0012339309,0.05000,5.406
36,self_attn.k_proj,0.0005470300,0.05000,4.830
36,self_attn.q_proj,0.0024462599,0.05000,4.882
36,self_attn.v_proj,0.0008094013,0.05000,4.888
36,self_attn.o_proj,0.0002597887,0.05000,1.454
36,mlp.up_proj,0.0008480489,0.05000,3.371
36,mlp.gate_proj,0.0007133146,0.05000,3.388
36,mlp.down_proj,0.0015971187,0.05000,5.952
37,self_attn.q_proj,0.0035302981,0.05000,4.597
37,self_attn.k_proj,0.0007176153,0.05000,4.631
37,self_attn.v_proj,0.0012357151,0.05000,4.644
37,self_attn.o_proj,0.0002945466,0.05000,1.404
37,mlp.gate_proj,0.0007103522,0.05000,3.077
37,mlp.up_proj,0.0008592182,0.05000,3.084
37,mlp.down_proj,0.0022288549,0.05000,5.345
38,self_attn.v_proj,0.0011880093,0.05000,4.358
38,self_attn.q_proj,0.0031908930,0.05000,4.372
38,self_attn.k_proj,0.0006751707,0.05000,4.399
38,self_attn.o_proj,0.0003943942,0.05000,1.340
38,mlp.gate_proj,0.0008062866,0.05000,3.148
38,mlp.up_proj,0.0009217460,0.05000,3.174
38,mlp.down_proj,0.0034524150,0.05000,5.362
39,self_attn.k_proj,0.0002554454,0.05000,4.257
39,self_attn.v_proj,0.0003735272,0.05000,4.294
39,self_attn.q_proj,0.0012166777,0.05000,4.295
39,self_attn.o_proj,0.0003847522,0.05000,1.344
39,mlp.gate_proj,0.0009165438,0.05000,3.561
39,mlp.up_proj,0.0010139998,0.05000,3.578
39,mlp.down_proj,0.0066875947,0.05000,5.467