| { | |
| "dataset_reader": { | |
| "type": "conll2003", | |
| "coding_scheme": "BIOUL", | |
| "tag_label": "ner", | |
| "token_indexers": { | |
| "bert": { | |
| "type": "bert-pretrained", | |
| "do_lowercase": true, | |
| "pretrained_model": "scibert_scivocab_uncased", | |
| "use_starting_offsets": true | |
| }, | |
| "token_characters": { | |
| "type": "characters", | |
| "min_padding_length": 3 | |
| } | |
| } | |
| }, | |
| "model": { | |
| "type": "crf_tagger", | |
| "calculate_span_f1": true, | |
| "constrain_crf_decoding": true, | |
| "dropout": 0.5, | |
| "encoder": { | |
| "type": "lstm", | |
| "bidirectional": true, | |
| "dropout": 0.5, | |
| "hidden_size": 768, | |
| "input_size": 896, | |
| "num_layers": 2 | |
| }, | |
| "include_start_end_transitions": false, | |
| "label_encoding": "BIOUL", | |
| "text_field_embedder": { | |
| "allow_unmatched_keys": true, | |
| "embedder_to_indexer_map": { | |
| "bert": ["bert", "bert-offsets"], | |
| "token_characters": ["token_characters"] | |
| }, | |
| "token_embedders": { | |
| "bert": { | |
| "type": "bert-pretrained", | |
| "pretrained_model": "scibert_scivocab_uncased" | |
| }, | |
| "token_characters": { | |
| "type": "character_encoding", | |
| "embedding": { | |
| "embedding_dim": 16 | |
| }, | |
| "encoder": { | |
| "type": "cnn", | |
| "conv_layer_activation": "relu", | |
| "embedding_dim": 16, | |
| "ngram_filter_sizes": [3], | |
| "num_filters": 128 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |