examples/deepspeech2/config.yml

# Copyright 2020 Huy Le Nguyen (@usimarit)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

speech_config:
  sample_rate: 16000
  frame_ms: 25
  stride_ms: 10
  num_feature_bins: 80
  feature_type: spectrogram
  preemphasis: 0.97
  normalize_signal: True
  normalize_feature: True
  normalize_per_frame: False

decoder_config:
  vocabulary: null
  blank_at_zero: False
  beam_width: 500
  lm_config:
    model_path: null
    alpha: 2.0
    beta: 1.0

model_config:
  name: deepspeech2
  conv_type: conv2d
  conv_kernels: [[11, 41], [11, 21], [11, 11]]
  conv_strides: [[2, 2], [1, 2], [1, 2]]
  conv_filters: [32, 32, 96]
  conv_dropout: 0.1
  rnn_nlayers: 5
  rnn_type: lstm
  rnn_units: 512
  rnn_bidirectional: True
  rnn_rowconv: 0
  rnn_dropout: 0.1
  fc_nlayers: 0
  fc_units: 1024

learning_config:
  train_dataset_config:
    use_tf: True
    data_paths:
      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
    tfrecords_dir: null
    shuffle: True
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: train

  eval_dataset_config:
    use_tf: True
    data_paths: null
    tfrecords_dir: null
    shuffle: False
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: eval

  test_dataset_config:
    use_tf: True
    data_paths:
      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
    tfrecords_dir: null
    shuffle: False
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: test

  optimizer_config:
    class_name: adam
    config:
      learning_rate: 0.0001

  running_config:
    batch_size: 4
    num_epochs: 20
    checkpoint:
      filepath: /mnt/e/Models/local/deepspeech2/checkpoints/{epoch:02d}.h5
      save_best_only: False
      save_weights_only: True
      save_freq: epoch
    states_dir: /mnt/e/Models/local/deepspeech2/states
    tensorboard:
      log_dir: /mnt/e/Models/local/deepspeech2/tensorboard
      histogram_freq: 1
      write_graph: True
      write_images: True
      update_freq: epoch
      profile_batch: 2