sleepyhead111 commited on
Commit
5d9d0f5
·
verified ·
1 Parent(s): 7cd8a0d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +19 -0
  2. data/de2en/0.4/trainable_data/dict.de.txt +0 -0
  3. data/de2en/0.4/trainable_data/dict.en.txt +0 -0
  4. data/de2en/0.4/trainable_data/preprocess.log +7 -0
  5. data/de2en/0.4/trainable_data/train.de-en.de.idx +3 -0
  6. data/de2en/0.4/trainable_data/train.de-en.en.idx +3 -0
  7. data/de2en/0.4/trainable_data/valid.de-en.de.bin +3 -0
  8. data/de2en/0.4/trainable_data/valid.de-en.de.idx +0 -0
  9. data/de2en/0.4/trainable_data/valid.de-en.en.bin +3 -0
  10. data/de2en/0.4/trainable_data/valid.de-en.en.idx +0 -0
  11. data/de2en/0.5/trainable_data/dict.de.txt +0 -0
  12. data/de2en/0.5/trainable_data/dict.en.txt +0 -0
  13. data/de2en/0.5/trainable_data/preprocess.log +6 -0
  14. data/de2en/0.5/trainable_data/train.de-en.de.idx +3 -0
  15. data/de2en/0.5/trainable_data/train.de-en.en.idx +3 -0
  16. data/de2en/0.5/trainable_data/valid.de-en.de.bin +3 -0
  17. data/de2en/0.5/trainable_data/valid.de-en.de.idx +0 -0
  18. data/de2en/0.5/trainable_data/valid.de-en.en.bin +3 -0
  19. data/de2en/0.5/trainable_data/valid.de-en.en.idx +0 -0
  20. data/de2en/0.6/trainable_data/dict.de.txt +0 -0
  21. data/de2en/0.6/trainable_data/dict.en.txt +0 -0
  22. data/de2en/0.6/trainable_data/preprocess.log +6 -0
  23. data/de2en/0.6/trainable_data/train.de-en.de.idx +3 -0
  24. data/de2en/0.6/trainable_data/train.de-en.en.bin +3 -0
  25. data/de2en/0.6/trainable_data/train.de-en.en.idx +3 -0
  26. data/de2en/0.6/trainable_data/valid.de-en.de.bin +3 -0
  27. data/de2en/0.6/trainable_data/valid.de-en.en.bin +3 -0
  28. data/de2en/0.7/trainable_data/dict.de.txt +0 -0
  29. data/de2en/0.7/trainable_data/dict.en.txt +0 -0
  30. data/de2en/0.7/trainable_data/preprocess.log +6 -0
  31. data/de2en/0.7/trainable_data/train.de-en.de.idx +3 -0
  32. data/de2en/0.7/trainable_data/train.de-en.en.idx +3 -0
  33. data/de2en/0.7/trainable_data/valid.de-en.de.bin +3 -0
  34. data/de2en/0.7/trainable_data/valid.de-en.de.idx +0 -0
  35. data/de2en/0.7/trainable_data/valid.de-en.en.bin +3 -0
  36. data/de2en/0.7/trainable_data/valid.de-en.en.idx +0 -0
  37. data/en2de/0.4/trainable_data/dict.de.txt +0 -0
  38. data/en2de/0.4/trainable_data/dict.en.txt +0 -0
  39. data/en2de/0.4/trainable_data/preprocess.log +6 -0
  40. data/en2de/0.4/trainable_data/valid.de-en.de.idx +0 -0
  41. data/en2de/0.4/trainable_data/valid.de-en.en.idx +0 -0
  42. data/en2de/0.5/trainable_data/valid.de-en.de.idx +0 -0
  43. data/en2de/0.5/trainable_data/valid.de-en.en.idx +0 -0
  44. data/en2de/0.6/trainable_data/dict.de.txt +0 -0
  45. data/en2de/0.6/trainable_data/dict.en.txt +0 -0
  46. data/en2de/0.6/trainable_data/preprocess.log +6 -0
  47. data/en2de/0.6/trainable_data/valid.de-en.de.idx +0 -0
  48. data/en2de/0.6/trainable_data/valid.de-en.en.idx +0 -0
  49. data/en2de/0.7/trainable_data/dict.de.txt +0 -0
  50. data/en2de/0.7/trainable_data/dict.en.txt +0 -0
.gitattributes CHANGED
@@ -41,3 +41,22 @@ mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge
41
  mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
42
  FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
  mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
42
  FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
  mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
44
+ mosesdecoder/contrib/iSenWeb/themes/images/common/Logo[[:space:]](2000x2000).png filter=lfs diff=lfs merge=lfs -text
45
+ mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.pdf filter=lfs diff=lfs merge=lfs -text
46
+ mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1 filter=lfs diff=lfs merge=lfs -text
47
+ mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
48
+ data/de2en/0.6/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
49
+ data/de2en/0.6/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
50
+ data/de2en/0.4/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
51
+ data/zh2en/0.4/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
52
+ data/zh2en/0.6/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
53
+ data/de2en/0.5/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
54
+ data/zh2en/0.7/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
55
+ data/zh2en/0.4/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
56
+ data/de2en/0.5/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
57
+ data/zh2en/0.7/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
58
+ data/zh2en/0.5/trainable_data/train.zh-en.en.idx filter=lfs diff=lfs merge=lfs -text
59
+ data/de2en/0.4/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
60
+ data/de2en/0.7/trainable_data/train.de-en.de.idx filter=lfs diff=lfs merge=lfs -text
61
+ data/de2en/0.7/trainable_data/train.de-en.en.idx filter=lfs diff=lfs merge=lfs -text
62
+ data/zh2en/0.6/trainable_data/train.zh-en.zh.idx filter=lfs diff=lfs merge=lfs -text
data/de2en/0.4/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.4/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.4/trainable_data/preprocess.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de2en/0.4/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de2en/0.4/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
3
+ [de] Dictionary: 47776 types
4
+ [de] /home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
5
+ [en] Dictionary: 47776 types
6
+ [en] /home/luoyf/xzq/data/de2en/0.4/tokenized/bpe.train.en: 46388489 sents, 1093341883 tokens, 3.37e-05% replaced by <unk>
7
+ Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.4/trainable_data
data/de2en/0.4/trainable_data/train.de-en.de.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
3
+ size 556661894
data/de2en/0.4/trainable_data/train.de-en.en.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa589765b69df43d7101556633b330d917797f1a6dca5bb780e4d2f36cabad74
3
+ size 556661894
data/de2en/0.4/trainable_data/valid.de-en.de.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
3
+ size 116454
data/de2en/0.4/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.4/trainable_data/valid.de-en.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
3
+ size 108124
data/de2en/0.4/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.5/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.5/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.5/trainable_data/preprocess.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.5/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [de] Dictionary: 47776 types
3
+ [de] /home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
4
+ [en] Dictionary: 47776 types
5
+ [en] /home/luoyf/xzq/data/de2en/0.5/tokenized/bpe.train.en: 46388489 sents, 1091759627 tokens, 6.59e-05% replaced by <unk>
6
+ Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.5/trainable_data
data/de2en/0.5/trainable_data/train.de-en.de.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
3
+ size 556661894
data/de2en/0.5/trainable_data/train.de-en.en.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d8cac62ca77ef3256226853e31b02dd0f3d618969a95b44b5073b63a555475f
3
+ size 556661894
data/de2en/0.5/trainable_data/valid.de-en.de.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
3
+ size 116454
data/de2en/0.5/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.5/trainable_data/valid.de-en.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
3
+ size 108124
data/de2en/0.5/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.6/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.6/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.6/trainable_data/preprocess.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.6/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [de] Dictionary: 47776 types
3
+ [de] /home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
4
+ [en] Dictionary: 47776 types
5
+ [en] /home/luoyf/xzq/data/de2en/0.6/tokenized/bpe.train.en: 46388489 sents, 1088982249 tokens, 9e-05% replaced by <unk>
6
+ Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.6/trainable_data
data/de2en/0.6/trainable_data/train.de-en.de.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
3
+ size 556661894
data/de2en/0.6/trainable_data/train.de-en.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fee349d545908c8d1b7d595c46dc0732978da2d04018371a2d67ee643145f83
3
+ size 2177964498
data/de2en/0.6/trainable_data/train.de-en.en.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135a8960c4d6ad8304017ca9b5e2356aedf3e610c4160f8362728723028f75e2
3
+ size 556661894
data/de2en/0.6/trainable_data/valid.de-en.de.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
3
+ size 116454
data/de2en/0.6/trainable_data/valid.de-en.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
3
+ size 108124
data/de2en/0.7/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.7/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/de2en/0.7/trainable_data/preprocess.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='de', target_lang='en', trainpref='/home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/de2en/0.7/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de2en/0.7/trainable_data/dict.en.txt', srcdict='/home/luoyf/xzq/data/de2en/0.7/trainable_data/dict.de.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [de] Dictionary: 47776 types
3
+ [de] /home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train.de: 46388489 sents, 1161403088 tokens, 0.0% replaced by <unk>
4
+ [en] Dictionary: 47776 types
5
+ [en] /home/luoyf/xzq/data/de2en/0.7/tokenized/bpe.train.en: 46388489 sents, 1086119495 tokens, 0.000119% replaced by <unk>
6
+ Wrote preprocessed data to /home/luoyf/xzq/data/de2en/0.7/trainable_data
data/de2en/0.7/trainable_data/train.de-en.de.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45630adf99dab73623937030327c96aad2a8173be734d9290264bb343c400ada
3
+ size 556661894
data/de2en/0.7/trainable_data/train.de-en.en.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96e991f89953b98e5f61a89c8e58f7e9f53856104b0e09dbac21f39bdd49e26
3
+ size 556661894
data/de2en/0.7/trainable_data/valid.de-en.de.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffa8af7d0a005a67d5bab8e6bfbc14b9abd27898d509a28d2ff8d3f17b556ed
3
+ size 116454
data/de2en/0.7/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.7/trainable_data/valid.de-en.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c1ee37e89aafb067d9c7528fd7502ef13268b3554f5232a244d54156846e5c
3
+ size 108124
data/de2en/0.7/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.4/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/en2de/0.4/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/en2de/0.4/trainable_data/preprocess.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref='/home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/en2de/0.4/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [en] Dictionary: 47776 types
3
+ [en] /home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train.en: 46388489 sents, 1094684830 tokens, 0.0% replaced by <unk>
4
+ [de] Dictionary: 47776 types
5
+ [de] /home/luoyf/xzq/data/en2de/0.4/tokenized/bpe.train.de: 46388489 sents, 1160669688 tokens, 2.21e-05% replaced by <unk>
6
+ Wrote preprocessed data to /home/luoyf/xzq/data/en2de/0.4/trainable_data
data/en2de/0.4/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.4/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.5/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.5/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.6/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/en2de/0.6/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/en2de/0.6/trainable_data/preprocess.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', optimizer=None, lr_scheduler='fixed', tokenizer=None, bpe=None, scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref='/home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train', validpref=None, testpref=None, align_suffix=None, destdir='/home/luoyf/xzq/data/en2de/0.6/trainable_data', thresholdtgt=0, thresholdsrc=0, tgtdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/home/luoyf/xzq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [en] Dictionary: 47776 types
3
+ [en] /home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train.en: 46388489 sents, 1094684830 tokens, 0.0% replaced by <unk>
4
+ [de] Dictionary: 47776 types
5
+ [de] /home/luoyf/xzq/data/en2de/0.6/tokenized/bpe.train.de: 46388489 sents, 1160113236 tokens, 0.000117% replaced by <unk>
6
+ Wrote preprocessed data to /home/luoyf/xzq/data/en2de/0.6/trainable_data
data/en2de/0.6/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.6/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/en2de/0.7/trainable_data/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/en2de/0.7/trainable_data/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff