Upload 3 files

Files changed (3) hide show

config.json CHANGED Viewed

@@ -1,11 +1,17 @@
 {
   "architectures": [
-    "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
   "dtype": "bfloat16",
   "eos_token_id": [
     128001,
     128008,
@@ -20,7 +26,8 @@
   "intermediate_size": 8192,
   "max_position_embeddings": 131072,
   "mlp_bias": false,
-  "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 16,
   "num_key_value_heads": 8,
@@ -38,4 +45,4 @@
   "transformers_version": "4.57.0",
   "use_cache": true,
   "vocab_size": 128256
-}

 {
   "architectures": [
+    "FlashHeadLlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_flash_head_llama.FlashHeadLlamaConfig",
+    "AutoModelForCausalLM": "modeling_flash_head_llama.FlashHeadLlamaForCausalLM"
+  },
   "bos_token_id": 128000,
+  "creation_time": 1753785304.4597948,
   "dtype": "bfloat16",
+  "enforce_equal_cluster_sizes": true,
   "eos_token_id": [
     128001,
     128008,
   "intermediate_size": 8192,
   "max_position_embeddings": 131072,
   "mlp_bias": false,
+  "model_type": "flash_head_llama",
+  "n_clusters": 8016,
   "num_attention_heads": 32,
   "num_hidden_layers": 16,
   "num_key_value_heads": 8,
   "transformers_version": "4.57.0",
   "use_cache": true,
   "vocab_size": 128256
+}

configuration_flash_head_llama.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from embedl.models.llama.modeling_flash_head import FlashHeadLlamaConfig

modeling_flash_head_llama.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from embedl.models.llama.modeling_flash_head import FlashHeadLlamaForCausalLM