{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Vosk Adaptation", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard" }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "URzWMmv50-Ba", "outputId": "0e096a99-74dd-42e2-efb1-9cba784c3664" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content\n", "--2022-08-17 09:48:52-- https://alphacephei.com/vosk-colab/kaldi.tar.gz\n", "Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n", "Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 809174554 (772M) [application/octet-stream]\n", "Saving to: ‘kaldi.tar.gz’\n", "\n", "kaldi.tar.gz 100%[===================>] 771.69M 20.3MB/s in 40s \n", "\n", "2022-08-17 09:49:33 (19.4 MB/s) - ‘kaldi.tar.gz’ saved [809174554/809174554]\n", "\n" ] } ], "source": [ "%cd /content\n", "!wget -c https://alphacephei.com/vosk-colab/kaldi.tar.gz\n", "!tar xzf kaldi.tar.gz" ] }, { "cell_type": "code", "source": [ "%cd /content/kaldi/egs/ac\n", "!wget -c https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n", "!rm -rf vosk-model-small-en-us-0.15-compile-colab\n", "!tar xf vosk-model-small-en-us-0.15-compile-colab.tar.gz" ], "metadata": { "id": "-065p7WC2SHh", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "241c7473-7464-48d5-b48d-dc6e3bf4971d" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/kaldi/egs/ac\n", "--2022-08-17 10:28:26-- https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n", "Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n", "Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 59618100 (57M) [application/octet-stream]\n", "Saving to: ‘vosk-model-small-en-us-0.15-compile-colab.tar.gz’\n", "\n", "vosk-model-small-en 100%[===================>] 56.86M 18.6MB/s in 3.6s \n", "\n", "2022-08-17 10:28:30 (15.7 MB/s) - ‘vosk-model-small-en-us-0.15-compile-colab.tar.gz’ saved [59618100/59618100]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "%cd /content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n", "!ls\n", "!cat compile-graph.sh\n", "!bash compile-graph.sh" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wuDjvNbd2sf9", "outputId": "34a1d2fe-d443-4574-e25d-824e38eb3a78" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n", "compile-graph.sh data_test decode.sh\texp\t local path.sh steps\n", "conf\t\t db\t dict.py\tget_vocab.py mfcc RESULTS utils\n", "#!/bin/bash\n", "\n", "set -x\n", "\n", ". path.sh\n", "\n", "pip3 install phonetisaurus\n", "\n", "rm -rf data\n", "rm -rf exp/tdnn/lgraph\n", "rm -rf exp/tdnn/lgraph_orig\n", "\n", "mkdir -p data/dict\n", "cp db/phone/* data/dict\n", "./dict.py > data/dict/lexicon.txt\n", "\n", "python3 ./get_vocab.py > data/mix.vocab\n", "ngramsymbols data/mix.vocab data/mix.syms\n", "farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols --unknown_symbol=\"[unk]\" db/extra.txt data/extra.far\n", "ngramcount --order=3 data/extra.far - |\n", " ngramprint --integers | grep -v \"\" | ngramread |\n", " ngramshrink --method=count_prune --count_pattern=\"3+:3\" |\n", " ngrammake --method=witten_bell - data/extra.mod\n", "gunzip -c db/en-50k-0.4-android.lm.gz | ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n", "ngrammerge --method=\"bayes_model_merge\" --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n", "ngramprint --ARPA data/en-us-mix.mod | gzip -c > data/en-us-mix.lm.gz\n", "\n", "# Prune for the first stage if needed\n", "# ngramshrink --method=relative_entropy --theta=2e-8 data/en-us-mix.mod data/en-us-mix-prune.mod\n", "# ngramprint --ARPA data/en-us-mix-prune.mod | gzip -c > data/en-us-mix-small.lm.gz\n", "\n", "utils/prepare_lang.sh data/dict \"[unk]\" data/lang_local data/lang\n", "utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n", "utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n", "\n", "utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n", "utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n", "\n", "# Lookahead part goes OOM\n", "#utils/mkgraph_lookahead.sh \\\n", "# --self-loop-scale 1.0 data/lang \\\n", "# exp/tdnn data/en-us-mix.lm.gz exp/tdnn/lgraph\n", "#utils/mkgraph_lookahead.sh \\\n", "# --self-loop-scale 1.0 data/lang \\\n", "# exp/tdnn db/en-50k-0.4-android.lm.gz exp/tdnn/lgraph_orig\n", "+ . path.sh\n", "+++ pwd\n", "++ export KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n", "++ KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n", "++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n", "++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n", "++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n", "++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n", "++ export LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n", "++ LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n", "++ export LC_ALL=C\n", "++ LC_ALL=C\n", "+ pip3 install phonetisaurus\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: phonetisaurus in /usr/local/lib/python3.7/dist-packages (0.3.0)\n", "+ rm -rf data\n", "+ rm -rf exp/tdnn/lgraph\n", "+ rm -rf exp/tdnn/lgraph_orig\n", "+ mkdir -p data/dict\n", "+ cp db/phone/extra_questions.txt db/phone/nonsilence_phones.txt db/phone/optional_silence.txt db/phone/silence_phones.txt data/dict\n", "+ ./dict.py\n", "+ python3 ./get_vocab.py\n", "+ ngramsymbols data/mix.vocab data/mix.syms\n", "+ farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols '--unknown_symbol=[unk]' db/extra.txt data/extra.far\n", "+ ngramcount --order=3 data/extra.far -\n", "+ ngrammake --method=witten_bell - data/extra.mod\n", "+ ngramshrink --method=count_prune --count_pattern=3+:3\n", "+ ngramread\n", "+ ngramprint --integers\n", "+ grep -v ''\n", "+ ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n", "+ gunzip -c db/en-50k-0.4-android.lm.gz\n", "+ ngrammerge --method=bayes_model_merge --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n", "+ ngramprint --ARPA data/en-us-mix.mod\n", "+ gzip -c\n", "+ utils/prepare_lang.sh data/dict '[unk]' data/lang_local data/lang\n", "utils/prepare_lang.sh data/dict [unk] data/lang_local data/lang\n", "Checking data/dict/silence_phones.txt ...\n", "--> reading data/dict/silence_phones.txt\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/dict/silence_phones.txt is OK\n", "\n", "Checking data/dict/optional_silence.txt ...\n", "--> reading data/dict/optional_silence.txt\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/dict/optional_silence.txt is OK\n", "\n", "Checking data/dict/nonsilence_phones.txt ...\n", "--> reading data/dict/nonsilence_phones.txt\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/dict/nonsilence_phones.txt is OK\n", "\n", "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n", "--> disjoint property is OK.\n", "\n", "Checking data/dict/lexicon.txt\n", "--> reading data/dict/lexicon.txt\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/dict/lexicon.txt is OK\n", "\n", "Checking data/dict/extra_questions.txt ...\n", "--> data/dict/extra_questions.txt is empty (this is OK)\n", "--> SUCCESS [validating dictionary directory data/dict]\n", "\n", "**Creating data/dict/lexiconp.txt from data/dict/lexicon.txt\n", "fstaddselfloops data/lang/phones/wdisambig_phones.int data/lang/phones/wdisambig_words.int \n", "prepare_lang.sh: validating output directory\n", "utils/validate_lang.pl data/lang\n", "Checking existence of separator file\n", "separator file data/lang/subword_separator.txt is empty or does not exist, deal in word case.\n", "Checking data/lang/phones.txt ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/lang/phones.txt is OK\n", "\n", "Checking words.txt: #0 ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> data/lang/words.txt is OK\n", "\n", "Checking disjoint: silence.txt, nonsilence.txt, disambig.txt ...\n", "--> silence.txt and nonsilence.txt are disjoint\n", "--> silence.txt and disambig.txt are disjoint\n", "--> disambig.txt and nonsilence.txt are disjoint\n", "--> disjoint property is OK\n", "\n", "Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n", "--> found no unexplainable phones in phones.txt\n", "\n", "Checking data/lang/phones/context_indep.{txt, int, csl} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 10 entry/entries in data/lang/phones/context_indep.txt\n", "--> data/lang/phones/context_indep.int corresponds to data/lang/phones/context_indep.txt\n", "--> data/lang/phones/context_indep.csl corresponds to data/lang/phones/context_indep.txt\n", "--> data/lang/phones/context_indep.{txt, int, csl} are OK\n", "\n", "Checking data/lang/phones/nonsilence.{txt, int, csl} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 156 entry/entries in data/lang/phones/nonsilence.txt\n", "--> data/lang/phones/nonsilence.int corresponds to data/lang/phones/nonsilence.txt\n", "--> data/lang/phones/nonsilence.csl corresponds to data/lang/phones/nonsilence.txt\n", "--> data/lang/phones/nonsilence.{txt, int, csl} are OK\n", "\n", "Checking data/lang/phones/silence.{txt, int, csl} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 10 entry/entries in data/lang/phones/silence.txt\n", "--> data/lang/phones/silence.int corresponds to data/lang/phones/silence.txt\n", "--> data/lang/phones/silence.csl corresponds to data/lang/phones/silence.txt\n", "--> data/lang/phones/silence.{txt, int, csl} are OK\n", "\n", "Checking data/lang/phones/optional_silence.{txt, int, csl} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 1 entry/entries in data/lang/phones/optional_silence.txt\n", "--> data/lang/phones/optional_silence.int corresponds to data/lang/phones/optional_silence.txt\n", "--> data/lang/phones/optional_silence.csl corresponds to data/lang/phones/optional_silence.txt\n", "--> data/lang/phones/optional_silence.{txt, int, csl} are OK\n", "\n", "Checking data/lang/phones/disambig.{txt, int, csl} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 14 entry/entries in data/lang/phones/disambig.txt\n", "--> data/lang/phones/disambig.int corresponds to data/lang/phones/disambig.txt\n", "--> data/lang/phones/disambig.csl corresponds to data/lang/phones/disambig.txt\n", "--> data/lang/phones/disambig.{txt, int, csl} are OK\n", "\n", "Checking data/lang/phones/roots.{txt, int} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 41 entry/entries in data/lang/phones/roots.txt\n", "--> data/lang/phones/roots.int corresponds to data/lang/phones/roots.txt\n", "--> data/lang/phones/roots.{txt, int} are OK\n", "\n", "Checking data/lang/phones/sets.{txt, int} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 41 entry/entries in data/lang/phones/sets.txt\n", "--> data/lang/phones/sets.int corresponds to data/lang/phones/sets.txt\n", "--> data/lang/phones/sets.{txt, int} are OK\n", "\n", "Checking data/lang/phones/extra_questions.{txt, int} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 9 entry/entries in data/lang/phones/extra_questions.txt\n", "--> data/lang/phones/extra_questions.int corresponds to data/lang/phones/extra_questions.txt\n", "--> data/lang/phones/extra_questions.{txt, int} are OK\n", "\n", "Checking data/lang/phones/word_boundary.{txt, int} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 166 entry/entries in data/lang/phones/word_boundary.txt\n", "--> data/lang/phones/word_boundary.int corresponds to data/lang/phones/word_boundary.txt\n", "--> data/lang/phones/word_boundary.{txt, int} are OK\n", "\n", "Checking optional_silence.txt ...\n", "--> reading data/lang/phones/optional_silence.txt\n", "--> data/lang/phones/optional_silence.txt is OK\n", "\n", "Checking disambiguation symbols: #0 and #1\n", "--> data/lang/phones/disambig.txt has \"#0\" and \"#1\"\n", "--> data/lang/phones/disambig.txt is OK\n", "\n", "Checking topo ...\n", "\n", "Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n", "--> data/lang/phones/word_boundary.txt doesn't include disambiguation symbols\n", "--> data/lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n", "--> data/lang/phones/word_boundary.txt is OK\n", "\n", "Checking word-level disambiguation symbols...\n", "--> data/lang/phones/wdisambig.txt exists (newer prepare_lang.sh)\n", "Checking word_boundary.int and disambig.int\n", "--> generating a 98 word/subword sequence\n", "--> resulting phone sequence from L.fst corresponds to the word sequence\n", "--> L.fst is OK\n", "--> generating a 49 word/subword sequence\n", "--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n", "--> L_disambig.fst is OK\n", "\n", "Checking data/lang/oov.{txt, int} ...\n", "--> text seems to be UTF-8 or ASCII, checking whitespaces\n", "--> text contains only allowed whitespaces\n", "--> 1 entry/entries in data/lang/oov.txt\n", "--> data/lang/oov.int corresponds to data/lang/oov.txt\n", "--> data/lang/oov.{txt, int} are OK\n", "\n", "--> data/lang/L.fst is olabel sorted\n", "--> data/lang/L_disambig.fst is olabel sorted\n", "--> SUCCESS [validating lang directory data/lang]\n", "+ utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n", "Converting 'db/en-50k-0.4-android.lm.gz' to FST\n", "arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst \n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217362 to 185036\n", "fstisstochastic data/lang_test/G.fst \n", "0.476411 -3.03779\n", "Succeeded in formatting LM: 'db/en-50k-0.4-android.lm.gz'\n", "+ utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n", "Converting 'data/en-us-mix.lm.gz' to FST\n", "arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test_adapt/words.txt - data/lang_test_adapt/G.fst \n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n", "LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217646 to 185095\n", "fstisstochastic data/lang_test_adapt/G.fst \n", "6.81902e-07 -3.03779\n", "Succeeded in formatting LM: 'data/en-us-mix.lm.gz'\n", "+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n", "tree-info exp/tdnn/tree \n", "tree-info exp/tdnn/tree \n", "fstdeterminizestar --use-log=true \n", "fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst \n", "fstminimizeencoded \n", "fstpushspecial \n", "fstisstochastic data/lang_test/tmp/LG.fst \n", "-0.145498 -0.146281\n", "[info]: LG not stochastic.\n", "fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test/phones/disambig.int --write-disambig-syms=data/lang_test/tmp/disambig_ilabels_2_1.int data/lang_test/tmp/ilabels_2_1.905 data/lang_test/tmp/LG.fst \n", "fstisstochastic data/lang_test/tmp/CLG_2_1.fst \n", "-0.145498 -0.146281\n", "[info]: CLG not stochastic.\n", "make-h-transducer --disambig-syms-out=exp/tdnn/graph/disambig_tid.int --transition-scale=1.0 data/lang_test/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n", "fstrmepslocal \n", "fsttablecompose exp/tdnn/graph/Ha.fst data/lang_test/tmp/CLG_2_1.fst \n", "fstdeterminizestar --use-log=true \n", "fstminimizeencoded \n", "fstrmsymbols exp/tdnn/graph/disambig_tid.int \n", "fstisstochastic exp/tdnn/graph/HCLGa.fst \n", "-0.109817 -0.571742\n", "HCLGa is not stochastic\n", "add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph/HCLGa.fst \n", "fstisstochastic exp/tdnn/graph/HCLG.fst \n", "1.90465e-09 -0.415046\n", "[info]: final HCLG is not stochastic.\n", "+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n", "tree-info exp/tdnn/tree \n", "tree-info exp/tdnn/tree \n", "fstdeterminizestar --use-log=true \n", "fsttablecompose data/lang_test_adapt/L_disambig.fst data/lang_test_adapt/G.fst \n", "fstminimizeencoded \n", "fstpushspecial \n", "fstisstochastic data/lang_test_adapt/tmp/LG.fst \n", "-0.148474 -0.149181\n", "[info]: LG not stochastic.\n", "fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test_adapt/phones/disambig.int --write-disambig-syms=data/lang_test_adapt/tmp/disambig_ilabels_2_1.int data/lang_test_adapt/tmp/ilabels_2_1.979 data/lang_test_adapt/tmp/LG.fst \n", "fstisstochastic data/lang_test_adapt/tmp/CLG_2_1.fst \n", "-0.148474 -0.149181\n", "[info]: CLG not stochastic.\n", "make-h-transducer --disambig-syms-out=exp/tdnn/graph_adapt/disambig_tid.int --transition-scale=1.0 data/lang_test_adapt/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n", "fstrmepslocal \n", "fsttablecompose exp/tdnn/graph_adapt/Ha.fst data/lang_test_adapt/tmp/CLG_2_1.fst \n", "fstdeterminizestar --use-log=true \n", "fstminimizeencoded \n", "fstrmsymbols exp/tdnn/graph_adapt/disambig_tid.int \n", "fstisstochastic exp/tdnn/graph_adapt/HCLGa.fst \n", "-0.113907 -0.5857\n", "HCLGa is not stochastic\n", "add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph_adapt/HCLGa.fst \n", "fstisstochastic exp/tdnn/graph_adapt/HCLG.fst \n", "1.90465e-09 -0.423618\n", "[info]: final HCLG is not stochastic.\n" ] } ] }, { "cell_type": "code", "source": [ "!cat decode.sh\n", "!bash decode.sh" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Sl3QBI1MXpc-", "outputId": "affac8a3-782f-4000-e31f-81bfed47a37a" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "#!/bin/bash\n", "\n", ". path.sh\n", "\n", "steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n", "steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n", "utils/fix_data_dir.sh data_test/test_small\n", "\n", "steps/online/nnet2/extract_ivectors_online.sh --nj 4 \\\n", " data_test/test_small exp/extractor \\\n", " exp/ivectors_test\n", "\n", "steps/nnet3/decode.sh --nj 4 \\\n", " --acwt 1.0 --post-decode-acwt 10.0 \\\n", " --online-ivector-dir exp/ivectors_test \\\n", " exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n", "\n", "steps/nnet3/decode.sh --nj 4 \\\n", " --acwt 1.0 --post-decode-acwt 10.0 \\\n", " --online-ivector-dir exp/ivectors_test \\\n", " exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n", "\n", "#steps/nnet3/decode_lookahead.sh --nj 4 \\\n", "# --acwt 1.0 --post-decode-acwt 10.0 \\\n", "# --online-ivector-dir exp/ivectors_test \\\n", "# exp/tdnn/lgraph data_test/test_small exp/tdnn/decode_test_adapt\n", "#steps/nnet3/decode_lookahead.sh --nj 4 \\\n", "# --acwt 1.0 --post-decode-acwt 10.0 \\\n", "# --online-ivector-dir exp/ivectors_test \\\n", "# exp/tdnn/lgraph_orig data_test/test_small exp/tdnn/decode_test\n", "steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n", "steps/make_mfcc.sh: moving data_test/test_small/feats.scp to data_test/test_small/.backup\n", "utils/validate_data_dir.sh: Successfully validated data-directory data_test/test_small\n", "steps/make_mfcc.sh: [info]: no segments file exists: assuming wav.scp indexed by utterance.\n", "steps/make_mfcc.sh: Succeeded creating MFCC features for test_small\n", "steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n", "Succeeded creating CMVN stats for test_small\n", "fix_data_dir.sh: kept all 50 utterances.\n", "fix_data_dir.sh: old files are kept in data_test/test_small/.backup\n", "steps/online/nnet2/extract_ivectors_online.sh --nj 4 data_test/test_small exp/extractor exp/ivectors_test\n", "steps/online/nnet2/extract_ivectors_online.sh: extracting iVectors\n", "steps/online/nnet2/extract_ivectors_online.sh: combining iVectors across jobs\n", "steps/online/nnet2/extract_ivectors_online.sh: done extracting (online) iVectors to exp/ivectors_test using the extractor in exp/extractor.\n", "steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n", "steps/nnet3/decode.sh: feature type is raw\n", "steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n", "steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_alignments.log\n", "Overall, lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.4\n", "steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_lattice_depth_stats.log\n", "score best paths\n", "local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n", "local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n", "score confidence and timing with sclite\n", "Decoding done.\n", "steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n", "steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n", "steps/nnet3/decode.sh: feature type is raw\n", "steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph exp/tdnn/decode_test\n", "steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_alignments.log\n", "Overall, lattice depth (10,50,90-percentile)=(1,5,23) and mean=10.4\n", "steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_lattice_depth_stats.log\n", "score best paths\n", "local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph exp/tdnn/decode_test\n", "local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n", "score confidence and timing with sclite\n", "Decoding done.\n" ] } ] }, { "cell_type": "code", "source": [ "!bash RESULTS" ], "metadata": { "id": "ABtcNyUDX4S8", "outputId": "d5e50be7-3293-4a59-94b8-9bfa46736481", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "%WER 11.77 [ 107 / 909, 13 ins, 7 del, 87 sub ] exp/tdnn/decode_test/wer_7_1.0\n", "%WER 0.22 [ 2 / 909, 0 ins, 1 del, 1 sub ] exp/tdnn/decode_test_adapt/wer_10_1.0\n" ] } ] } ] }