update hfa version3 - power

wolfgitpr · wolfgitpr · commit 2b99b88f9ff6 · 2025-09-27T17:36:26.000+08:00
diff --git a/src/apps/HubertFA/gui/MainWindow.cpp b/src/apps/HubertFA/gui/MainWindow.cpp
@@ -55,14 +55,6 @@ namespace HFA {
             return false;
         }
 
-        YAML::Node config = YAML::LoadFile(config_file.string());
-        const fs::path encoder_path = model_path / (config["hubert_config"]["encoder"].as<std::string>() + "-" +
-                                                    config["hubert_config"]["channel"].as<std::string>() + ".onnx");
-        if (!fs::exists(encoder_path)) {
-            error = encoder_path.string() + " does not exist";
-            return false;
-        }
-
         YAML::Node vocab = YAML::LoadFile(vocab_file.string());
         const YAML::Node &dictionaries = vocab["dictionaries"];
         if (dictionaries) {
@@ -234,7 +226,7 @@ namespace HFA {
 
         connect(remove, &QPushButton::clicked, this, &MainWindow::slot_removeListItem);
         connect(clear, &QPushButton::clicked, this, &MainWindow::slot_clearTaskList);
-        connect(runHfa, &QPushButton::clicked, this, &MainWindow::slot_runFbl);
+        connect(runHfa, &QPushButton::clicked, this, &MainWindow::slot_runHfa);
 
         connect(btnOutTg, &QPushButton::clicked, this, &MainWindow::slot_outTgPath);
 
@@ -349,7 +341,7 @@ namespace HFA {
         taskList->clear();
     }
 
-    void MainWindow::slot_runFbl() {
+    void MainWindow::slot_runHfa() {
         out->clear();
         m_threadpool->clear();
 
@@ -422,7 +414,7 @@ namespace HFA {
     }
 
     void MainWindow::slot_threadFinished() {
-        const auto msg = QString("Fbl complete! Total: %3, Success: %1, Failed: %2")
+        const auto msg = QString("Hfa complete! Total: %3, Success: %1, Failed: %2")
                              .arg(m_workTotal - m_workError)
                              .arg(m_workError)
                              .arg(m_workTotal);
diff --git a/src/apps/HubertFA/gui/MainWindow.h b/src/apps/HubertFA/gui/MainWindow.h
@@ -78,7 +78,7 @@ namespace HFA {
 
         void slot_removeListItem() const;
         void slot_clearTaskList() const;
-        void slot_runFbl();
+        void slot_runHfa();
 
         void slot_oneFailed(const QString &filename, const QString &msg);
         void slot_oneFinished(const QString &filename, const QString &msg);
diff --git a/src/apps/HubertFA/util/AlignmentDecoder.cpp b/src/apps/HubertFA/util/AlignmentDecoder.cpp
@@ -44,7 +44,7 @@ namespace HFA {
                                        const std::vector<std::string> &class_names,
                                        const std::map<std::string, float> &mel_spec_config)
         : vocab_(vocab), non_speech_phs_(class_names) {
-        melspec_config_.hop_length = mel_spec_config.at("hop_length");
+        melspec_config_.hop_length = mel_spec_config.at("hop_size");
         melspec_config_.sample_rate = mel_spec_config.at("sample_rate");
         frame_length_ = static_cast<float>(melspec_config_.hop_length) / melspec_config_.sample_rate;
     }
diff --git a/src/apps/HubertFA/util/Hfa.cpp b/src/apps/HubertFA/util/Hfa.cpp
@@ -27,11 +27,8 @@ namespace HFA {
         const auto melspec_config = config["melspec_config"].as<std::map<std::string, float>>();
         hfa_input_sample_rate = static_cast<int>(melspec_config.find("sample_rate")->second);
 
-        const fs::path encoder_path = model_folder / (config["hubert_config"]["encoder"].as<std::string>() + "-" +
-                                                      config["hubert_config"]["channel"].as<std::string>() + ".onnx");
-        const fs::path predictor_path = model_folder / "model.onnx";
-
-        m_hfa = std::make_unique<HfaModel>(encoder_path, predictor_path, provider, device_id);
+        const fs::path model_path = model_folder / "model.onnx";
+        m_hfa = std::make_unique<HfaModel>(model_path, provider, device_id);
 
         const fs::path vocab_file = model_folder / "vocab.yaml";
         YAML::Node vocab = YAML::LoadFile(vocab_file.string());
diff --git a/src/apps/HubertFA/util/HfaModel.cpp b/src/apps/HubertFA/util/HfaModel.cpp
@@ -10,13 +10,9 @@ namespace HFA {
     static bool initDirectML(Ort::SessionOptions &options, int deviceIndex, std::string *errorMessage = nullptr);
     static bool initCUDA(Ort::SessionOptions &options, int deviceIndex, std::string *errorMessage = nullptr);
 
-    HfaModel::HfaModel(const std::filesystem::path &encoder_Path, const std::filesystem::path &predictor_Path,
-                       const ExecutionProvider provider, int device_id)
+    HfaModel::HfaModel(const std::filesystem::path &model_Path, const ExecutionProvider provider, const int device_id)
         : m_env(Ort::Env(ORT_LOGGING_LEVEL_WARNING, "HfaModel")), m_session_options(Ort::SessionOptions()),
-          m_encoder_session(nullptr), m_predictor_session(nullptr) {
-
-        m_input_name = "waveform";
-
+          m_model_session(nullptr) {
         m_session_options.SetInterOpNumThreads(4);
 
         // Choose execution provider based on the provided option
@@ -52,20 +48,17 @@ namespace HFA {
         // Create ONNX Runtime Session
         try {
 #ifdef _WIN32
-            m_encoder_session = new Ort::Session(m_env, encoder_Path.wstring().c_str(), m_session_options);
-            m_predictor_session = new Ort::Session(m_env, predictor_Path.wstring().c_str(), m_session_options);
+            m_model_session = new Ort::Session(m_env, model_Path.wstring().c_str(), m_session_options);
 #else
-            m_encoder_session = new Ort::Session(m_env, encoder_Path.c_str(), m_session_options);
-            m_predictor_session = new Ort::Session(m_env, predictor_Path.c_str(), m_session_options); // Fixed extra dot
+            m_model_session = new Ort::Session(m_env, model_Path.c_str(), m_session_options);
 #endif
         } catch (const Ort::Exception &e) {
             std::cout << "Failed to create session: " << e.what() << std::endl;
         }
     }
 
     HfaModel::~HfaModel() {
-        delete m_encoder_session;
-        delete m_predictor_session;
+        delete m_model_session;
         m_input_name = {};
     }
 
@@ -76,7 +69,6 @@ namespace HFA {
             return false;
         }
 
-        // 1. 准备输入张量
         const size_t batch_size = input_data.size();
         size_t max_len = 0;
         for (const auto &vec : input_data) {
@@ -87,7 +79,6 @@ namespace HFA {
         flattened_input.reserve(batch_size * max_len);
         for (const auto &vec : input_data) {
             flattened_input.insert(flattened_input.end(), vec.begin(), vec.end());
-            // 填充不足部分
             flattened_input.insert(flattened_input.end(), max_len - vec.size(), 0.0f);
         }
 
@@ -96,34 +87,13 @@ namespace HFA {
         const Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
             m_memoryInfo, flattened_input.data(), flattened_input.size(), input_shape.data(), input_shape.size());
 
-        // 2. 运行编码器
-        std::vector<Ort::Value> encoder_outputs;
-        try {
-            encoder_outputs = m_encoder_session->Run(Ort::RunOptions{nullptr}, &m_input_name, &input_tensor, 1,
-                                                     &m_encoder_output_name, 1);
-        } catch (const Ort::Exception &e) {
-            msg = "编码器推理错误: " + std::string(e.what());
-            return false;
-        }
-
-        // 检查编码器输出是否有效
-        if (encoder_outputs.empty()) {
-            msg = "编码器未返回输出";
-            return false;
-        }
-
-        // 3. 运行预测器
         try {
-            // 准备输出节点名称
             const std::vector<const char *> output_names = {m_predictor_output_name[0], m_predictor_output_name[1],
                                                             m_predictor_output_name[2]};
 
-            // 直接使用编码器输出作为预测器输入
-            auto predictor_outputs =
-                m_predictor_session->Run(Ort::RunOptions{nullptr}, &m_encoder_output_name, &encoder_outputs[0], 1,
-                                         output_names.data(), output_names.size());
+            auto predictor_outputs = m_model_session->Run(Ort::RunOptions{nullptr}, &m_input_name, &input_tensor, 1,
+                                                          output_names.data(), output_names.size());
 
-            // 4. 解析输出结果
             // ph_frame_logits [batch, time, classes]
             auto parse_3d_output = [](Ort::Value &tensor) {
                 const auto shape = tensor.GetTensorTypeAndShapeInfo().GetShape();
diff --git a/src/apps/HubertFA/util/HfaModel.h b/src/apps/HubertFA/util/HfaModel.h
@@ -17,19 +17,18 @@ namespace HFA {
 
     class HfaModel {
     public:
-        explicit HfaModel(const std::filesystem::path &encoder_Path, const std::filesystem::path &predictor_Path, ExecutionProvider provider, int device_id);
+        explicit HfaModel(const std::filesystem::path &model_Path, ExecutionProvider provider, int device_id);
         ~HfaModel();
         bool forward(const std::vector<std::vector<float>> &input_data, HfaLogits &result, std::string &msg) const;
 
     private:
         Ort::Env m_env;
         Ort::SessionOptions m_session_options;
-        Ort::Session *m_encoder_session;
-        Ort::Session *m_predictor_session;
+        Ort::Session *m_model_session;
         Ort::AllocatorWithDefaultOptions m_allocator;
-        const char *m_input_name;
-        const char *m_encoder_output_name = "input_feature";
-        const char* m_predictor_output_name[3] = {"ph_frame_logits", "ph_edge_logits", "cvnt_logits"};
+
+        const char *m_input_name = "waveform";
+        const char *m_predictor_output_name[3] = {"ph_frame_logits", "ph_edge_logits", "cvnt_logits"};
 
 #ifdef _WIN_X86
         Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ namespace HFA {`
`44`	`44`	`const std::vector<std::string> &class_names,`
`45`	`45`	`const std::map<std::string, float> &mel_spec_config)`
`46`	`46`	`: vocab_(vocab), non_speech_phs_(class_names) {`
`47`		`- melspec_config_.hop_length = mel_spec_config.at("hop_length");`
	`47`	`+ melspec_config_.hop_length = mel_spec_config.at("hop_size");`
`48`	`48`	`melspec_config_.sample_rate = mel_spec_config.at("sample_rate");`
`49`	`49`	`frame_length_ = static_cast<float>(melspec_config_.hop_length) / melspec_config_.sample_rate;`
`50`	`50`	`}`