Model save
Browse files- README.md +58 -0
- generation_config.json +12 -0
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +36 -0
README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
tags:
|
| 4 |
+
- llama-factory
|
| 5 |
+
- generated_from_trainer
|
| 6 |
+
model-index:
|
| 7 |
+
- name: bugs-swesmithseq
|
| 8 |
+
results: []
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 12 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 13 |
+
|
| 14 |
+
# bugs-swesmithseq
|
| 15 |
+
|
| 16 |
+
This model was trained from scratch on the None dataset.
|
| 17 |
+
|
| 18 |
+
## Model description
|
| 19 |
+
|
| 20 |
+
More information needed
|
| 21 |
+
|
| 22 |
+
## Intended uses & limitations
|
| 23 |
+
|
| 24 |
+
More information needed
|
| 25 |
+
|
| 26 |
+
## Training and evaluation data
|
| 27 |
+
|
| 28 |
+
More information needed
|
| 29 |
+
|
| 30 |
+
## Training procedure
|
| 31 |
+
|
| 32 |
+
### Training hyperparameters
|
| 33 |
+
|
| 34 |
+
The following hyperparameters were used during training:
|
| 35 |
+
- learning_rate: 4e-05
|
| 36 |
+
- train_batch_size: 1
|
| 37 |
+
- eval_batch_size: 8
|
| 38 |
+
- seed: 42
|
| 39 |
+
- distributed_type: multi-GPU
|
| 40 |
+
- num_devices: 8
|
| 41 |
+
- gradient_accumulation_steps: 2
|
| 42 |
+
- total_train_batch_size: 16
|
| 43 |
+
- total_eval_batch_size: 64
|
| 44 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 45 |
+
- lr_scheduler_type: cosine
|
| 46 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 47 |
+
- num_epochs: 7.0
|
| 48 |
+
|
| 49 |
+
### Training results
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
### Framework versions
|
| 54 |
+
|
| 55 |
+
- Transformers 4.56.1
|
| 56 |
+
- Pytorch 2.9.1+cu128
|
| 57 |
+
- Datasets 4.4.1
|
| 58 |
+
- Tokenizers 0.22.1
|
generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151643
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 151643,
|
| 8 |
+
"temperature": 0.6,
|
| 9 |
+
"top_k": 20,
|
| 10 |
+
"top_p": 0.95,
|
| 11 |
+
"transformers_version": "4.56.1"
|
| 12 |
+
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4902257696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b3bd6421a529c4d03a66c4189556f9ff0cc38d7c637a57cb9fa6cdb7c392ed5
|
| 3 |
size 4902257696
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915960368
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a8e222efff1e752aa1f16fdde47a505692110262c2ec23f6186910661000f15
|
| 3 |
size 4915960368
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4983068496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a407465add0cc20e2292104fb85cf822e8d30094c55a852c1c5a360e7662e2cb
|
| 3 |
size 4983068496
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1580230264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6859171af1efc2981a4f24267a4493a40a6b8d5c32456c94e4678c8943b80b50
|
| 3 |
size 1580230264
|
trainer_log.jsonl
CHANGED
|
@@ -1205,3 +1205,39 @@
|
|
| 1205 |
{"current_steps": 6000, "total_steps": 6188, "loss": 0.1545, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "21:48:44", "remaining_time": "0:41:00"}
|
| 1206 |
{"current_steps": 6005, "total_steps": 6188, "loss": 0.1438, "lr": 1.0764428885686073e-07, "epoch": 6.794567062818336, "percentage": 97.04, "elapsed_time": "21:52:50", "remaining_time": "0:40:00"}
|
| 1207 |
{"current_steps": 6010, "total_steps": 6188, "loss": 0.1422, "lr": 1.0187844705857875e-07, "epoch": 6.8002263723825696, "percentage": 97.12, "elapsed_time": "21:55:34", "remaining_time": "0:38:57"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1205 |
{"current_steps": 6000, "total_steps": 6188, "loss": 0.1545, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "21:48:44", "remaining_time": "0:41:00"}
|
| 1206 |
{"current_steps": 6005, "total_steps": 6188, "loss": 0.1438, "lr": 1.0764428885686073e-07, "epoch": 6.794567062818336, "percentage": 97.04, "elapsed_time": "21:52:50", "remaining_time": "0:40:00"}
|
| 1207 |
{"current_steps": 6010, "total_steps": 6188, "loss": 0.1422, "lr": 1.0187844705857875e-07, "epoch": 6.8002263723825696, "percentage": 97.12, "elapsed_time": "21:55:34", "remaining_time": "0:38:57"}
|
| 1208 |
+
{"current_steps": 6015, "total_steps": 6188, "loss": 0.1373, "lr": 9.627091116348076e-08, "epoch": 6.805885681946802, "percentage": 97.2, "elapsed_time": "21:58:32", "remaining_time": "0:37:55"}
|
| 1209 |
+
{"current_steps": 6020, "total_steps": 6188, "loss": 0.1437, "lr": 9.082172578412263e-08, "epoch": 6.8115449915110355, "percentage": 97.29, "elapsed_time": "22:01:25", "remaining_time": "0:36:52"}
|
| 1210 |
+
{"current_steps": 6025, "total_steps": 6188, "loss": 0.1521, "lr": 8.553093427325243e-08, "epoch": 6.817204301075269, "percentage": 97.37, "elapsed_time": "22:04:21", "remaining_time": "0:35:49"}
|
| 1211 |
+
{"current_steps": 6030, "total_steps": 6188, "loss": 0.1379, "lr": 8.039857872345736e-08, "epoch": 6.8228636106395015, "percentage": 97.45, "elapsed_time": "22:07:15", "remaining_time": "0:34:46"}
|
| 1212 |
+
{"current_steps": 6035, "total_steps": 6188, "loss": 0.1538, "lr": 7.542469996684843e-08, "epoch": 6.828522920203735, "percentage": 97.53, "elapsed_time": "22:10:04", "remaining_time": "0:33:43"}
|
| 1213 |
+
{"current_steps": 6040, "total_steps": 6188, "loss": 0.1359, "lr": 7.06093375747141e-08, "epoch": 6.834182229767968, "percentage": 97.61, "elapsed_time": "22:12:56", "remaining_time": "0:32:39"}
|
| 1214 |
+
{"current_steps": 6045, "total_steps": 6188, "loss": 0.1235, "lr": 6.595252985721834e-08, "epoch": 6.839841539332202, "percentage": 97.69, "elapsed_time": "22:15:55", "remaining_time": "0:31:36"}
|
| 1215 |
+
{"current_steps": 6050, "total_steps": 6188, "loss": 0.1291, "lr": 6.145431386309186e-08, "epoch": 6.845500848896434, "percentage": 97.77, "elapsed_time": "22:18:59", "remaining_time": "0:30:32"}
|
| 1216 |
+
{"current_steps": 6055, "total_steps": 6188, "loss": 0.1313, "lr": 5.711472537933693e-08, "epoch": 6.851160158460668, "percentage": 97.85, "elapsed_time": "22:22:01", "remaining_time": "0:29:28"}
|
| 1217 |
+
{"current_steps": 6060, "total_steps": 6188, "loss": 0.146, "lr": 5.293379893094752e-08, "epoch": 6.856819468024901, "percentage": 97.93, "elapsed_time": "22:25:00", "remaining_time": "0:28:24"}
|
| 1218 |
+
{"current_steps": 6065, "total_steps": 6188, "loss": 0.1723, "lr": 4.891156778062734e-08, "epoch": 6.862478777589134, "percentage": 98.01, "elapsed_time": "22:27:39", "remaining_time": "0:27:19"}
|
| 1219 |
+
{"current_steps": 6070, "total_steps": 6188, "loss": 0.1578, "lr": 4.5048063928527785e-08, "epoch": 6.868138087153367, "percentage": 98.09, "elapsed_time": "22:30:40", "remaining_time": "0:26:15"}
|
| 1220 |
+
{"current_steps": 6075, "total_steps": 6188, "loss": 0.138, "lr": 4.134331811199932e-08, "epoch": 6.873797396717601, "percentage": 98.17, "elapsed_time": "22:33:39", "remaining_time": "0:25:10"}
|
| 1221 |
+
{"current_steps": 6080, "total_steps": 6188, "loss": 0.1455, "lr": 3.7797359805333836e-08, "epoch": 6.879456706281833, "percentage": 98.25, "elapsed_time": "22:36:42", "remaining_time": "0:24:05"}
|
| 1222 |
+
{"current_steps": 6085, "total_steps": 6188, "loss": 0.15, "lr": 3.441021721954485e-08, "epoch": 6.885116015846067, "percentage": 98.34, "elapsed_time": "22:39:45", "remaining_time": "0:23:00"}
|
| 1223 |
+
{"current_steps": 6090, "total_steps": 6188, "loss": 0.1575, "lr": 3.11819173021366e-08, "epoch": 6.8907753254103, "percentage": 98.42, "elapsed_time": "22:42:53", "remaining_time": "0:21:55"}
|
| 1224 |
+
{"current_steps": 6095, "total_steps": 6188, "loss": 0.1485, "lr": 2.8112485736881967e-08, "epoch": 6.896434634974533, "percentage": 98.5, "elapsed_time": "22:45:58", "remaining_time": "0:20:50"}
|
| 1225 |
+
{"current_steps": 6100, "total_steps": 6188, "loss": 0.1502, "lr": 2.520194694363376e-08, "epoch": 6.902093944538766, "percentage": 98.58, "elapsed_time": "22:49:03", "remaining_time": "0:19:45"}
|
| 1226 |
+
{"current_steps": 6105, "total_steps": 6188, "loss": 0.1376, "lr": 2.2450324078120423e-08, "epoch": 6.907753254103, "percentage": 98.66, "elapsed_time": "22:52:07", "remaining_time": "0:18:39"}
|
| 1227 |
+
{"current_steps": 6110, "total_steps": 6188, "loss": 0.1371, "lr": 1.9857639031759522e-08, "epoch": 6.913412563667233, "percentage": 98.74, "elapsed_time": "22:55:12", "remaining_time": "0:17:33"}
|
| 1228 |
+
{"current_steps": 6115, "total_steps": 6188, "loss": 0.1872, "lr": 1.7423912431489e-08, "epoch": 6.9190718732314656, "percentage": 98.82, "elapsed_time": "22:58:15", "remaining_time": "0:16:27"}
|
| 1229 |
+
{"current_steps": 6120, "total_steps": 6188, "loss": 0.1477, "lr": 1.51491636396095e-08, "epoch": 6.924731182795699, "percentage": 98.9, "elapsed_time": "23:01:18", "remaining_time": "0:15:20"}
|
| 1230 |
+
{"current_steps": 6125, "total_steps": 6188, "loss": 0.1389, "lr": 1.3033410753608977e-08, "epoch": 6.930390492359932, "percentage": 98.98, "elapsed_time": "23:04:23", "remaining_time": "0:14:14"}
|
| 1231 |
+
{"current_steps": 6130, "total_steps": 6188, "loss": 0.1472, "lr": 1.1076670606045004e-08, "epoch": 6.936049801924165, "percentage": 99.06, "elapsed_time": "23:07:23", "remaining_time": "0:13:07"}
|
| 1232 |
+
{"current_steps": 6135, "total_steps": 6188, "loss": 0.1473, "lr": 9.278958764391554e-09, "epoch": 6.941709111488398, "percentage": 99.14, "elapsed_time": "23:10:26", "remaining_time": "0:12:00"}
|
| 1233 |
+
{"current_steps": 6140, "total_steps": 6188, "loss": 0.1285, "lr": 7.64028953092133e-09, "epoch": 6.947368421052632, "percentage": 99.22, "elapsed_time": "23:13:30", "remaining_time": "0:10:53"}
|
| 1234 |
+
{"current_steps": 6145, "total_steps": 6188, "loss": 0.1455, "lr": 6.16067594259695e-09, "epoch": 6.953027730616864, "percentage": 99.31, "elapsed_time": "23:16:34", "remaining_time": "0:09:46"}
|
| 1235 |
+
{"current_steps": 6150, "total_steps": 6188, "loss": 0.1347, "lr": 4.840129770957713e-09, "epoch": 6.958687040181098, "percentage": 99.39, "elapsed_time": "23:19:35", "remaining_time": "0:08:38"}
|
| 1236 |
+
{"current_steps": 6155, "total_steps": 6188, "loss": 0.1389, "lr": 3.6786615220352208e-09, "epoch": 6.964346349745331, "percentage": 99.47, "elapsed_time": "23:22:31", "remaining_time": "0:07:31"}
|
| 1237 |
+
{"current_steps": 6160, "total_steps": 6188, "loss": 0.1384, "lr": 2.6762804362623353e-09, "epoch": 6.970005659309564, "percentage": 99.55, "elapsed_time": "23:25:31", "remaining_time": "0:06:23"}
|
| 1238 |
+
{"current_steps": 6165, "total_steps": 6188, "loss": 0.1449, "lr": 1.8329944884021288e-09, "epoch": 6.975664968873797, "percentage": 99.63, "elapsed_time": "23:28:32", "remaining_time": "0:05:15"}
|
| 1239 |
+
{"current_steps": 6170, "total_steps": 6188, "loss": 0.1355, "lr": 1.1488103874923717e-09, "epoch": 6.981324278438031, "percentage": 99.71, "elapsed_time": "23:31:37", "remaining_time": "0:04:07"}
|
| 1240 |
+
{"current_steps": 6175, "total_steps": 6188, "loss": 0.1413, "lr": 6.237335767744767e-10, "epoch": 6.986983588002264, "percentage": 99.79, "elapsed_time": "23:34:41", "remaining_time": "0:02:58"}
|
| 1241 |
+
{"current_steps": 6180, "total_steps": 6188, "loss": 0.1537, "lr": 2.577682336690757e-10, "epoch": 6.992642897566497, "percentage": 99.87, "elapsed_time": "23:37:46", "remaining_time": "0:01:50"}
|
| 1242 |
+
{"current_steps": 6185, "total_steps": 6188, "loss": 0.1527, "lr": 5.091726972938915e-11, "epoch": 6.99830220713073, "percentage": 99.95, "elapsed_time": "23:40:49", "remaining_time": "0:00:41"}
|
| 1243 |
+
{"current_steps": 6187, "total_steps": 6188, "epoch": 7.0, "percentage": 99.98, "elapsed_time": "23:41:44", "remaining_time": "0:00:13"}
|