| | #!/usr/bin/env bash |
| | set -e |
| |
|
| | |
| | |
| |
|
| | if [ -z "$2" ]; then |
| | echo 'Usage: print-vocabulary <MODEL_DIR> <VOCAB_DIR>' |
| | exit 1 |
| | fi |
| |
|
| | model_dir="$1" |
| | vocab_dir="$2" |
| |
|
| | mkdir -p "${vocab_dir}" |
| |
|
| | temp_dir="$(mktemp -d)" |
| | function finish { |
| | rm -rf "${temp_dir}" |
| | } |
| |
|
| | trap finish EXIT |
| |
|
| | find "${model_dir}" -name '*.zip' -type f | \ |
| | while read -r zip_file; do |
| | model_name="$(basename "${zip_file}" .zip)" |
| | vocab_file="${vocab_dir}/${model_name}.txt" |
| |
|
| | if [ -s "${vocab_file}" ]; then |
| | echo "Skipping ${model_name} (${vocab_file})" |
| | continue |
| | fi |
| |
|
| | model_dir="${temp_dir}/${model_name}" |
| | mkdir -p "${model_dir}" |
| | unzip -j "${zip_file}" "${model_name}/graph/Gr.fst" -d "${model_dir}" || \ |
| | unzip -j "${zip_file}" "${model_name}/Gr.fst" -d "${model_dir}" || \ |
| | unzip -j "${zip_file}" "${model_name}/words.txt" -d "${model_dir}" || \ |
| | unzip -j "${zip_file}" "${model_name}/graph/words.txt" -d "${model_dir}" || \ |
| | true |
| |
|
| | if [ -f "${model_dir}/words.txt" ]; then |
| | cut -d' ' -f1 < "${model_dir}/words.txt" | sort | uniq > "${vocab_file}" |
| | elif [ -f "${model_dir}/Gr.fst" ]; then |
| | fstprint "${model_dir}/Gr.fst" | cut -f3 | sort | uniq > "${vocab_file}" |
| | else |
| | echo "ERROR: can't get vocabulary for ${model_name}" |
| | fi |
| |
|
| | done |
| |
|