ner-span-glirel / glirel_config.json
skv03's picture
Upload multi-domain zero-shot GLiREL model
da85fb0 verified
{
"lr_encoder": "1e-5",
"lr_others": "1e-4",
"weight_decay_encoder": 0.01,
"weight_decay_other": 0.01,
"num_steps": 500000,
"warmup_ratio": 0.1,
"train_batch_size": 8,
"eval_every": 15000,
"gradient_accumulation": 8,
"eval_batch_size": 32,
"num_layers_freeze": null,
"early_stopping_patience": null,
"early_stopping_delta": 0.0,
"save_at": [
15000,
30000,
45000,
60000,
75000,
90000,
105000,
120000,
135000,
150000,
165000,
180000,
195000,
210000,
225000,
240000,
255000,
270000,
285000,
300000,
315000,
330000,
345000,
360000,
375000,
390000,
405000,
420000,
435000,
450000,
465000,
480000,
495000,
500000
],
"max_saves": 8,
"max_width": 6,
"model_name": "microsoft/deberta-v3-large",
"fine_tune": true,
"subtoken_pooling": "first",
"hidden_size": 768,
"scorer": "dot",
"rel_mode": "marker",
"span_marker_mode": "markerv1",
"refine_prompt": false,
"refine_relation": false,
"ffn_mul": 4,
"dropout": 0.4,
"scheduler": "cosine_with_warmup",
"loss_func": "binary_cross_entropy_loss",
"alpha": 0.6,
"gamma": 3,
"label_embed_strategy": "both",
"use_typed_relations": true,
"consistency_loss_weight": 0.1,
"enable_ner_module": true,
"ner_threshold": 0.5,
"ner_fn_loss_weight": 1.5,
"ner_loss_weight": 100.0,
"rel_loss_weight": 1.0,
"ner_threshold_offset": -0.02,
"training_phase": "ner_only",
"span_f1_target": 0.7,
"relation_f1_target": 0.7,
"coref_classifier": false,
"coref_loss_weight": 10.0,
"coreference_label": null,
"dataset_name": "custom",
"root_dir": "multi_domain",
"train_data": [
"data/multi_domain_train_processed.jsonl"
],
"eval_data": [
"data/multi_domain_test_processed.jsonl"
],
"prev_path": "./ner-glirel-log/saved_at/model_60000",
"size_sup": -1,
"num_train_rel_types": 40,
"num_unseen_rel_types": 15,
"top_k": 1,
"random_drop": false,
"max_len": 512,
"eval_threshold": [
0.1,
0.2,
0.3,
0.5,
0.6,
0.7
],
"max_entity_pair_distance": null,
"fixed_relation_types": false,
"name": "large",
"log_dir": "ner-glirel-log-2/"
}