Skip to content

Commit b6e2682

Browse files
authored
Merge pull request #13 from sooftware/dev
Release v0.2 (resolved #11 resolved #12)
2 parents 39b0328 + 697c04b commit b6e2682

File tree

64 files changed

+504
-81
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+504
-81
lines changed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,32 @@ $ python ./openspeech_cli/hydra_train.py \
179179
criterion=ctc
180180
```
181181

182+
### Evaluation examples
183+
184+
- Example1: Evaluation the `listen_attend_spell` model:
185+
186+
```
187+
$ python ./openspeech_cli/hydra_eval.py \
188+
audio=melspectrogram \
189+
eval.model_name=listen_attend_spell \
190+
eval.dataset_path=$DATASET_PATH \
191+
eval.checkpoint_path=$CHECKPOINT_PATH \
192+
eval.manifest_file_path=$MANIFEST_FILE_PATH
193+
```
194+
195+
- Example2: Evaluation the `listen_attend_spell`, `conformer_lstm` models with ensemble:
196+
197+
```
198+
$ python ./openspeech_cli/hydra_eval.py \
199+
audio=melspectrogram \
200+
eval.model_names=(listen_attend_spell, conformer_lstm) \
201+
eval.dataset_path=$DATASET_PATH \
202+
eval.checkpoint_paths=($CHECKPOINT_PATH1, $CHECKPOINT_PATH2) \
203+
eval.ensemble_weights=(0.3, 0.7) \
204+
eval.ensemble_method=weighted \
205+
eval.manifest_file_path=$MANIFEST_FILE_PATH
206+
```
207+
182208
## Installation
183209

184210
This project recommends Python 3.7 or higher.

openspeech/configs/eval.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# @package _group_
2+
3+
defaults:
4+
- audio: null
5+
- eval: default

openspeech/data/audio/filter_bank/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class FilterBankConfigs(OpenspeechDataclass):
3535
3636
Configuration objects inherit from :class: `~openspeech.dataclass.configs.OpenspeechDataclass`.
3737
38-
Configurations:
38+
Args:
3939
name (str): name of feature transform. (default: fbank)
4040
sample_rate (int): sampling rate of audio (default: 16000)
4141
frame_length (float): frame length for spectrogram (default: 20.0)

openspeech/data/audio/melspectrogram/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class MelSpectrogramConfigs(OpenspeechDataclass):
3535
3636
Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
3737
38-
Configurations:
38+
Args:
3939
name (str): name of feature transform. (default: melspectrogram)
4040
sample_rate (int): sampling rate of audio (default: 16000)
4141
frame_length (float): frame length for spectrogram (default: 20.0)

openspeech/data/audio/mfcc/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class MFCCConfigs(OpenspeechDataclass):
3535
3636
Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
3737
38-
Configurations:
38+
Args:
3939
name (str): name of feature transform. (default: mfcc)
4040
sample_rate (int): sampling rate of audio (default: 16000)
4141
frame_length (float): frame length for spectrogram (default: 20.0)

openspeech/data/audio/spectrogram/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class SpectrogramConfigs(OpenspeechDataclass):
3535
3636
Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
3737
38-
Configurations:
38+
Args:
3939
name (str): name of feature transform. (default: spectrogram)
4040
sample_rate (int): sampling rate of audio (default: 16000)
4141
frame_length (float): frame length for spectrogram (default: 20.0)

openspeech/data/data_loader.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
22+
from typing import Tuple
2223

2324
import torch
2425
import numpy as np
@@ -132,3 +133,27 @@ def __len__(self):
132133

133134
def shuffle(self, epoch):
134135
np.random.shuffle(self.bins)
136+
137+
138+
def load_dataset(manifest_file_path: str) -> Tuple[list, list]:
139+
"""
140+
Provides dictionary of filename and labels.
141+
142+
Args:
143+
manifest_file_path (str): evaluation manifest file path.
144+
145+
Returns: target_dict
146+
* target_dict (dict): dictionary of filename and labels
147+
"""
148+
audio_paths = list()
149+
transcripts = list()
150+
151+
with open(manifest_file_path) as f:
152+
for idx, line in enumerate(f.readlines()):
153+
audio_path, korean_transcript, transcript = line.split('\t')
154+
transcript = transcript.replace('\n', '')
155+
156+
audio_paths.append(audio_path)
157+
transcripts.append(transcript)
158+
159+
return audio_paths, transcripts

openspeech/data/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def __init__(
9090
self.apply_noise_augment = apply_noise_augment
9191
self.apply_time_stretch_augment = apply_time_stretch_augment
9292
self.apply_joining_augment = apply_joining_augment
93-
self.transforms = AUDIO_FEATURE_TRANSFORM_DATACLASS_REGISTRY[configs.name](configs)
93+
self.transforms = AUDIO_FEATURE_TRANSFORM_DATACLASS_REGISTRY[configs.audio.name](configs)
9494
self._load_audio = load_audio
9595

9696
if self.apply_spec_augment:

openspeech/dataclass/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
Fp16GPUTrainerConfigs,
3232
Fp16TPUTrainerConfigs,
3333
Fp64CPUTrainerConfigs,
34+
EvaluationConfigs,
35+
EnsembleEvaluationConfigs,
3436
)
3537

3638
OPENSPEECH_CONFIGS = [
@@ -62,3 +64,7 @@
6264
AUGMENT_DATACLASS_REGISTRY = {
6365
"default": AugmentConfigs,
6466
}
67+
EVAL_DATACLASS_REGISTRY = {
68+
"default": EvaluationConfigs,
69+
"ensemble": EnsembleEvaluationConfigs,
70+
}

0 commit comments

Comments
 (0)