openspeech-team
diff --git a/‎README.md‎
Lines changed: 26 additions & 0 deletions b/‎README.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎openspeech/configs/eval.yaml‎
Lines changed: 5 additions & 0 deletions b/‎openspeech/configs/eval.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎openspeech/configs/configs.yaml‎ ‎openspeech/configs/train.yaml‎openspeech/configs/configs.yaml renamed to openspeech/configs/train.yaml b/‎openspeech/configs/configs.yaml‎ ‎openspeech/configs/train.yaml‎openspeech/configs/configs.yaml renamed to openspeech/configs/train.yaml
diff --git a/‎openspeech/data/audio/filter_bank/configuration.py‎
Lines changed: 1 addition & 1 deletion b/‎openspeech/data/audio/filter_bank/configuration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openspeech/data/audio/melspectrogram/configuration.py‎
Lines changed: 1 addition & 1 deletion b/‎openspeech/data/audio/melspectrogram/configuration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openspeech/data/audio/mfcc/configuration.py‎
Lines changed: 1 addition & 1 deletion b/‎openspeech/data/audio/mfcc/configuration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openspeech/data/audio/spectrogram/configuration.py‎
Lines changed: 1 addition & 1 deletion b/‎openspeech/data/audio/spectrogram/configuration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openspeech/data/data_loader.py‎
Lines changed: 25 additions & 0 deletions b/‎openspeech/data/data_loader.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎openspeech/data/dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎openspeech/data/dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openspeech/dataclass/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎openspeech/dataclass/__init__.py‎
Lines changed: 6 additions & 0 deletions
@@ -179,6 +179,32 @@ $ python ./openspeech_cli/hydra_train.py \
     criterion=ctc
 ```
 
+### Evaluation examples
+  
+- Example1: Evaluation the `listen_attend_spell` model:
+  
+```
+$ python ./openspeech_cli/hydra_eval.py \
+    audio=melspectrogram \
+    eval.model_name=listen_attend_spell \
+    eval.dataset_path=$DATASET_PATH \
+    eval.checkpoint_path=$CHECKPOINT_PATH \
+    eval.manifest_file_path=$MANIFEST_FILE_PATH  
+```
+
+- Example2: Evaluation the `listen_attend_spell`, `conformer_lstm` models with ensemble:
+  
+```
+$ python ./openspeech_cli/hydra_eval.py \
+    audio=melspectrogram \
+    eval.model_names=(listen_attend_spell, conformer_lstm) \
+    eval.dataset_path=$DATASET_PATH \
+    eval.checkpoint_paths=($CHECKPOINT_PATH1, $CHECKPOINT_PATH2) \
+    eval.ensemble_weights=(0.3, 0.7) \
+    eval.ensemble_method=weighted \
+    eval.manifest_file_path=$MANIFEST_FILE_PATH  
+```
+  
 ## Installation
 
 This project recommends Python 3.7 or higher.  
 
@@ -0,0 +1,5 @@
+# @package _group_
+
+defaults:
+  - audio: null
+  - eval: default
@@ -35,7 +35,7 @@ class FilterBankConfigs(OpenspeechDataclass):
 
     Configuration objects inherit from :class: `~openspeech.dataclass.configs.OpenspeechDataclass`.
 
-    Configurations:
+    Args:
         name (str): name of feature transform. (default: fbank)
         sample_rate (int): sampling rate of audio (default: 16000)
         frame_length (float): frame length for spectrogram (default: 20.0)
 
@@ -35,7 +35,7 @@ class MelSpectrogramConfigs(OpenspeechDataclass):
 
     Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
 
-    Configurations:
+    Args:
         name (str): name of feature transform. (default: melspectrogram)
         sample_rate (int): sampling rate of audio (default: 16000)
         frame_length (float): frame length for spectrogram (default: 20.0)
 
@@ -35,7 +35,7 @@ class MFCCConfigs(OpenspeechDataclass):
 
     Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
 
-    Configurations:
+    Args:
         name (str): name of feature transform. (default: mfcc)
         sample_rate (int): sampling rate of audio (default: 16000)
         frame_length (float): frame length for spectrogram (default: 20.0)
 
@@ -35,7 +35,7 @@ class SpectrogramConfigs(OpenspeechDataclass):
 
     Configuration objects inherit from :class: `~openspeech.dataclass.OpenspeechDataclass`.
 
-    Configurations:
+    Args:
         name (str): name of feature transform. (default: spectrogram)
         sample_rate (int): sampling rate of audio (default: 16000)
         frame_length (float): frame length for spectrogram (default: 20.0)
 
@@ -19,6 +19,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+from typing import Tuple
 
 import torch
 import numpy as np
@@ -132,3 +133,27 @@ def __len__(self):
 
     def shuffle(self, epoch):
         np.random.shuffle(self.bins)
+
+
+def load_dataset(manifest_file_path: str) -> Tuple[list, list]:
+    """
+    Provides dictionary of filename and labels.
+
+    Args:
+        manifest_file_path (str): evaluation manifest file path.
+
+    Returns: target_dict
+        * target_dict (dict): dictionary of filename and labels
+    """
+    audio_paths = list()
+    transcripts = list()
+
+    with open(manifest_file_path) as f:
+        for idx, line in enumerate(f.readlines()):
+            audio_path, korean_transcript, transcript = line.split('\t')
+            transcript = transcript.replace('\n', '')
+
+            audio_paths.append(audio_path)
+            transcripts.append(transcript)
+
+    return audio_paths, transcripts
@@ -90,7 +90,7 @@ def __init__(
         self.apply_noise_augment = apply_noise_augment
         self.apply_time_stretch_augment = apply_time_stretch_augment
         self.apply_joining_augment = apply_joining_augment
-        self.transforms = AUDIO_FEATURE_TRANSFORM_DATACLASS_REGISTRY[configs.name](configs)
+        self.transforms = AUDIO_FEATURE_TRANSFORM_DATACLASS_REGISTRY[configs.audio.name](configs)
         self._load_audio = load_audio
 
         if self.apply_spec_augment:
 
@@ -31,6 +31,8 @@
     Fp16GPUTrainerConfigs,
     Fp16TPUTrainerConfigs,
     Fp64CPUTrainerConfigs,
+    EvaluationConfigs,
+    EnsembleEvaluationConfigs,
 )
 
 OPENSPEECH_CONFIGS = [
@@ -62,3 +64,7 @@
 AUGMENT_DATACLASS_REGISTRY = {
     "default": AugmentConfigs,
 }
+EVAL_DATACLASS_REGISTRY = {
+    "default": EvaluationConfigs,
+    "ensemble": EnsembleEvaluationConfigs,
+}