Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +19 -0
- data/de2en/0.4/trainable_data/dict.de.txt +0 -0
- data/de2en/0.4/trainable_data/dict.en.txt +0 -0
- data/de2en/0.4/trainable_data/preprocess.log +7 -0
- data/de2en/0.4/trainable_data/train.de-en.de.idx +3 -0
- data/de2en/0.4/trainable_data/train.de-en.en.idx +3 -0
- data/de2en/0.4/trainable_data/valid.de-en.de.bin +3 -0
- data/de2en/0.4/trainable_data/valid.de-en.de.idx +0 -0
- data/de2en/0.4/trainable_data/valid.de-en.en.bin +3 -0
- data/de2en/0.4/trainable_data/valid.de-en.en.idx +0 -0
- data/de2en/0.5/trainable_data/dict.de.txt +0 -0
- data/de2en/0.5/trainable_data/dict.en.txt +0 -0
- data/de2en/0.5/trainable_data/preprocess.log +6 -0
- data/de2en/0.5/trainable_data/train.de-en.de.idx +3 -0
- data/de2en/0.5/trainable_data/train.de-en.en.idx +3 -0
- data/de2en/0.5/trainable_data/valid.de-en.de.bin +3 -0
- data/de2en/0.5/trainable_data/valid.de-en.de.idx +0 -0
- data/de2en/0.5/trainable_data/valid.de-en.en.bin +3 -0
- data/de2en/0.5/trainable_data/valid.de-en.en.idx +0 -0
- data/de2en/0.6/trainable_data/dict.de.txt +0 -0
- data/de2en/0.6/trainable_data/dict.en.txt +0 -0
- data/de2en/0.6/trainable_data/preprocess.log +6 -0
- data/de2en/0.6/trainable_data/train.de-en.de.idx +3 -0
- data/de2en/0.6/trainable_data/train.de-en.en.bin +3 -0
- data/de2en/0.6/trainable_data/train.de-en.en.idx +3 -0
- data/de2en/0.6/trainable_data/valid.de-en.de.bin +3 -0
- data/de2en/0.6/trainable_data/valid.de-en.en.bin +3 -0
- data/de2en/0.7/trainable_data/dict.de.txt +0 -0
- data/de2en/0.7/trainable_data/dict.en.txt +0 -0
- data/de2en/0.7/trainable_data/preprocess.log +6 -0
- data/de2en/0.7/trainable_data/train.de-en.de.idx +3 -0
- data/de2en/0.7/trainable_data/train.de-en.en.idx +3 -0
- data/de2en/0.7/trainable_data/valid.de-en.de.bin +3 -0
- data/de2en/0.7/trainable_data/valid.de-en.de.idx +0 -0
- data/de2en/0.7/trainable_data/valid.de-en.en.bin +3 -0
- data/de2en/0.7/trainable_data/valid.de-en.en.idx +0 -0
- data/en2de/0.4/trainable_data/dict.de.txt +0 -0
- data/en2de/0.4/trainable_data/dict.en.txt +0 -0
- data/en2de/0.4/trainable_data/preprocess.log +6 -0
- data/en2de/0.4/trainable_data/valid.de-en.de.idx +0 -0
- data/en2de/0.4/trainable_data/valid.de-en.en.idx +0 -0
- data/en2de/0.5/trainable_data/valid.de-en.de.idx +0 -0
- data/en2de/0.5/trainable_data/valid.de-en.en.idx +0 -0
- data/en2de/0.6/trainable_data/dict.de.txt +0 -0
- data/en2de/0.6/trainable_data/dict.en.txt +0 -0
- data/en2de/0.6/trainable_data/preprocess.log +6 -0
- data/en2de/0.6/trainable_data/valid.de-en.de.idx +0 -0
- data/en2de/0.6/trainable_data/valid.de-en.en.idx +0 -0
- data/en2de/0.7/trainable_data/dict.de.txt +0 -0
- data/en2de/0.7/trainable_data/dict.en.txt +0 -0
.gitattributes
CHANGED
|
@@ -41,3 +41,22 @@ mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge
|
|
| 41 |
mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
|
| 42 |
FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
|
| 42 |
FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
mosesdecoder/contrib/iSenWeb/themes/images/common/Logo[[:space:]](2000x2000).png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.pdf filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
data/de2en/0.6/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
data/de2en/0.6/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
data/de2en/0.4/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
data/zh2en/0.4/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
data/zh2en/0.6/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
data/de2en/0.5/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
data/zh2en/0.7/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
data/zh2en/0.4/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
data/de2en/0.5/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
data/zh2en/0.7/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
data/zh2en/0.5/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
data/de2en/0.4/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
data/de2en/0.7/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
data/de2en/0.7/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
data/zh2en/0.6/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
|
data/de2en/0.4/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.4/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.4/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de2en/0.4/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de2en/0.4/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 3 |
+
[de] Dictionary: 47776 types
|
| 4 |
+
[de] /home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
|
| 5 |
+
[en] Dictionary: 47776 types
|
| 6 |
+
[en] /home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train.en: 46388489 sents, 1093341883 tokens, 3.37e-05% replaced by <unk>
|
| 7 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.4/trainable_data
|
data/de2en/0.4/trainable_data/train.de-en.de.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
|
| 3 |
+
size 556661894
|
data/de2en/0.4/trainable_data/train.de-en.en.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa589765b69df43d7101556633b330d917797f1a6dca5bb780e4d2f36cabad74
|
| 3 |
+
size 556661894
|
data/de2en/0.4/trainable_data/valid.de-en.de.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
|
| 3 |
+
size 116454
|
data/de2en/0.4/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.4/trainable_data/valid.de-en.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
|
| 3 |
+
size 108124
|
data/de2en/0.4/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.5/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.5/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.5/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.5/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[de] Dictionary: 47776 types
|
| 3 |
+
[de] /home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[en] Dictionary: 47776 types
|
| 5 |
+
[en] /home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train.en: 46388489 sents, 1091759627 tokens, 6.59e-05% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.5/trainable_data
|
data/de2en/0.5/trainable_data/train.de-en.de.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
|
| 3 |
+
size 556661894
|
data/de2en/0.5/trainable_data/train.de-en.en.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d8cac62ca77ef3256226853e31b02dd0f3d618969a95b44b5073b63a555475f
|
| 3 |
+
size 556661894
|
data/de2en/0.5/trainable_data/valid.de-en.de.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
|
| 3 |
+
size 116454
|
data/de2en/0.5/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.5/trainable_data/valid.de-en.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
|
| 3 |
+
size 108124
|
data/de2en/0.5/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.6/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.6/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.6/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.6/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[de] Dictionary: 47776 types
|
| 3 |
+
[de] /home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[en] Dictionary: 47776 types
|
| 5 |
+
[en] /home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train.en: 46388489 sents, 1088982249 tokens, 9e-05% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.6/trainable_data
|
data/de2en/0.6/trainable_data/train.de-en.de.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
|
| 3 |
+
size 556661894
|
data/de2en/0.6/trainable_data/train.de-en.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fee349d545908c8d1b7d595c46dc0732978da2d04018371a2d67ee643145f83
|
| 3 |
+
size 2177964498
|
data/de2en/0.6/trainable_data/train.de-en.en.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:135a8960c4d6ad8304017ca9b5e2356aedf3e610c4160f8362728723028f75e2
|
| 3 |
+
size 556661894
|
data/de2en/0.6/trainable_data/valid.de-en.de.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
|
| 3 |
+
size 116454
|
data/de2en/0.6/trainable_data/valid.de-en.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
|
| 3 |
+
size 108124
|
data/de2en/0.7/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.7/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/de2en/0.7/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.7/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de2en/0.7/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de2en/0.7/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[de] Dictionary: 47776 types
|
| 3 |
+
[de] /home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[en] Dictionary: 47776 types
|
| 5 |
+
[en] /home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train.en: 46388489 sents, 1086119495 tokens, 0.000119% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.7/trainable_data
|
data/de2en/0.7/trainable_data/train.de-en.de.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
|
| 3 |
+
size 556661894
|
data/de2en/0.7/trainable_data/train.de-en.en.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a96e991f89953b98e5f61a89c8e58f7e9f53856104b0e09dbac21f39bdd49e26
|
| 3 |
+
size 556661894
|
data/de2en/0.7/trainable_data/valid.de-en.de.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
|
| 3 |
+
size 116454
|
data/de2en/0.7/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.7/trainable_data/valid.de-en.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
|
| 3 |
+
size 108124
|
data/de2en/0.7/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.4/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/en2de/0.4/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/en2de/0.4/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref='/home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/en2de/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[en] Dictionary: 47776 types
|
| 3 |
+
[en] /home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train.en: 46388489 sents, 1094684830 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[de] Dictionary: 47776 types
|
| 5 |
+
[de] /home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train.de: 46388489 sents, 1160669688 tokens, 2.21e-05% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/en2de/0.4/trainable_data
|
data/en2de/0.4/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.4/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.5/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.5/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.6/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/en2de/0.6/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/en2de/0.6/trainable_data/preprocess.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref='/home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/en2de/0.6/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[en] Dictionary: 47776 types
|
| 3 |
+
[en] /home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train.en: 46388489 sents, 1094684830 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[de] Dictionary: 47776 types
|
| 5 |
+
[de] /home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train.de: 46388489 sents, 1160113236 tokens, 0.000117% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /home/luoyf/xzq/data/en2de/0.6/trainable_data
|
data/en2de/0.6/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.6/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/en2de/0.7/trainable_data/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/en2de/0.7/trainable_data/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|