atomwalk12 commited on
Commit
16f55cb
·
verified ·
1 Parent(s): 7916d8a

Model save

Browse files
README.md CHANGED
@@ -4,8 +4,8 @@ library_name: transformers
4
  model_name: Qwen2.5-3B-Instruct-SFT
5
  tags:
6
  - generated_from_trainer
7
- - sft
8
  - trl
 
9
  licence: license
10
  ---
11
 
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/walks/huggingface/runs/2vfrb84p)
31
 
32
 
33
  This model was trained with SFT.
 
4
  model_name: Qwen2.5-3B-Instruct-SFT
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
+ - sft
9
  licence: license
10
  ---
11
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/walks/huggingface/runs/54e1zct5)
31
 
32
 
33
  This model was trained with SFT.
adapter_config.json CHANGED
@@ -25,12 +25,12 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "down_proj",
29
- "gate_proj",
30
- "v_proj",
31
  "up_proj",
 
32
  "q_proj",
 
33
  "o_proj",
 
34
  "k_proj"
35
  ],
36
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
 
28
  "up_proj",
29
+ "v_proj",
30
  "q_proj",
31
+ "down_proj",
32
  "o_proj",
33
+ "gate_proj",
34
  "k_proj"
35
  ],
36
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36b01e77a772cd58ef97247bf15cb76132ebefe8f9a14d0bfbe9a7eefe265d54
3
  size 119801528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92354c006341b1e40cccb66eeddd961992916aebc74f57b16476485c0bbb9399
3
  size 119801528
config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 2048,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "layer_types": [
13
+ "full_attention",
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention"
49
+ ],
50
+ "max_position_embeddings": 32768,
51
+ "max_window_layers": 70,
52
+ "model_type": "qwen2",
53
+ "num_attention_heads": 16,
54
+ "num_hidden_layers": 36,
55
+ "num_key_value_heads": 2,
56
+ "quantization_config": {
57
+ "_load_in_4bit": true,
58
+ "_load_in_8bit": false,
59
+ "bnb_4bit_compute_dtype": "bfloat16",
60
+ "bnb_4bit_quant_storage": "bfloat16",
61
+ "bnb_4bit_quant_type": "nf4",
62
+ "bnb_4bit_use_double_quant": false,
63
+ "llm_int8_enable_fp32_cpu_offload": false,
64
+ "llm_int8_has_fp16_weight": false,
65
+ "llm_int8_skip_modules": null,
66
+ "llm_int8_threshold": 6.0,
67
+ "load_in_4bit": true,
68
+ "load_in_8bit": false,
69
+ "quant_method": "bitsandbytes"
70
+ },
71
+ "rms_norm_eps": 1e-06,
72
+ "rope_scaling": null,
73
+ "rope_theta": 1000000.0,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "torch_dtype": "bfloat16",
77
+ "transformers_version": "4.55.2",
78
+ "use_cache": true,
79
+ "use_sliding_window": false,
80
+ "vocab_size": 151936
81
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6526168574bd64e8c1c7151559f9f905f9a8ca5019d8736af3bcecab8002ff5
3
  size 6673
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1cc73962fc3af5f40bdd49375b15a8552503fbade42febe4b8096adfbe4b1c
3
  size 6673