| name: "dyu_fr_transformer-sp" | |
| joeynmt_version: "2.3.0" | |
| model_dir: "/app/saved_model" | |
| use_cuda: False # False for CPU training | |
| fp16: False | |
| data: | |
| train: "../data/dyu_fr" | |
| dev: "../data/dyu_fr" | |
| test: "../data/dyu_fr" | |
| dataset_type: "huggingface" | |
| dataset_cfg: | |
| name: "dyu-fr" | |
| sample_dev_subset: 1460 | |
| src: | |
| lang: "dyu" | |
| max_length: 100 | |
| lowercase: False | |
| normalize: False | |
| level: "bpe" | |
| voc_limit: 4000 | |
| voc_min_freq: 1 | |
| voc_file: "/app/saved_model/vocab.txt" | |
| tokenizer_type: "sentencepiece" | |
| tokenizer_cfg: | |
| model_file: "/app/saved_model/sp.model" | |
| trg: | |
| lang: "fr" | |
| max_length: 100 | |
| lowercase: False | |
| normalize: False | |
| level: "bpe" | |
| voc_limit: 4000 | |
| voc_min_freq: 1 | |
| voc_file: "/app/saved_model/vocab.txt" | |
| tokenizer_type: "sentencepiece" | |
| tokenizer_cfg: | |
| model_file: "/app/saved_model/sp.model" | |
| special_symbols: | |
| unk_token: "<unk>" | |
| unk_id: 0 | |
| pad_token: "<pad>" | |
| pad_id: 1 | |
| bos_token: "<s>" | |
| bos_id: 2 | |
| eos_token: "</s>" | |
| eos_id: 3 | |
| testing: | |
| load_model: "/app/saved_model/best.ckpt" | |
| n_best: 1 | |
| beam_size: 5 | |
| beam_alpha: 1.0 | |
| batch_size: 256 | |
| batch_type: "token" | |
| max_output_length: 100 | |
| eval_metrics: ["bleu"] | |
| #return_prob: "hyp" | |
| #return_attention: False | |
| sacrebleu_cfg: | |
| tokenize: "13a" | |
| training: | |
| #load_model: "/app/saved_model/latest.ckpt" | |
| #reset_best_ckpt: False | |
| #reset_scheduler: False | |
| #reset_optimizer: False | |
| #reset_iter_state: False | |
| random_seed: 42 | |
| optimizer: "adamw" | |
| normalization: "tokens" | |
| adam_betas: [0.9, 0.999] | |
| scheduling: "warmupinversesquareroot" | |
| learning_rate_warmup: 100 | |
| learning_rate: 0.0003 | |
| learning_rate_min: 0.00000001 | |
| weight_decay: 0.0 | |
| label_smoothing: 0.1 | |
| loss: "crossentropy" | |
| batch_size: 512 | |
| batch_type: "token" | |
| batch_multiplier: 4 | |
| early_stopping_metric: "bleu" | |
| epochs: 6 | |
| updates: 550 | |
| validation_freq: 30 | |
| logging_freq: 5 | |
| overwrite: True | |
| shuffle: True | |
| print_valid_sents: [0, 1, 2, 3] | |
| keep_best_ckpts: 3 | |
| model: | |
| initializer: "xavier_uniform" | |
| bias_initializer: "zeros" | |
| init_gain: 1.0 | |
| embed_initializer: "xavier_uniform" | |
| embed_init_gain: 1.0 | |
| tied_embeddings: True | |
| tied_softmax: True | |
| encoder: | |
| type: "transformer" | |
| num_layers: 6 | |
| num_heads: 4 | |
| embeddings: | |
| embedding_dim: 256 | |
| scale: True | |
| dropout: 0.0 | |
| # typically ff_size = 4 x hidden_size | |
| hidden_size: 256 | |
| ff_size: 1024 | |
| dropout: 0.2 | |
| layer_norm: "pre" | |
| decoder: | |
| type: "transformer" | |
| num_layers: 6 | |
| num_heads: 8 | |
| embeddings: | |
| embedding_dim: 256 | |
| scale: True | |
| dropout: 0.0 | |
| # typically ff_size = 4 x hidden_size | |
| hidden_size: 256 | |
| ff_size: 1024 | |
| dropout: 0.1 | |
| layer_norm: "pre" | |