fix: 修复paddleocr模型报错(#239)

hhhhsc701 · hefanli · web-flow · commit d8e357e7185e · 2026-01-14T11:15:20.000+08:00
* refactor: replace the database from mysql to pgsql

* refactor: replace the database from mysql to pgsql

* refactor: merge the databases of DataMate and LabelStudio

* refactor: merge the databases of DataMate and LabelStudio

* fix: resolve the conflict

* feat: 适配pgsql

* fix: resolve the annotation task bug

* fix: fix the system param presetting data

* fix: 修复paddleocr模型报错

* fix: 修复paddleocr模型报错

* fix: 修复paddleocr模型报错

---------

Co-authored-by: uname &lt;2986773479@qq.com&gt;
diff --git a/runtime/ops/filter/img_blurred_images_cleaner/process.py b/runtime/ops/filter/img_blurred_images_cleaner/process.py
@@ -35,7 +35,7 @@ def execute(self, sample: Dict[str, Any]):
             data = bytes_transform.bytes_to_numpy(img_bytes)
             blurred_images = self._blurred_images_filter(data, file_name)
             sample[self.data_key] = bytes_transform.numpy_to_bytes(blurred_images, file_type)
-        logger.info(f"fileName: ｛file_name｝, method: ImagesBlurredCleaner costs {(time.time() - start):6f} s")
+        logger.info(f"fileName: {file_name}, method: ImagesBlurredCleaner costs {(time.time() - start):6f} s")
         return sample
 
     def _blurred_images_filter(self, image, file_name):
@@ -46,6 +46,6 @@ def _blurred_images_filter(self, image, file_name):
         score = cv2.Laplacian(gray, cv2.CV_64F).var()
         if score <= self._blurred_threshold:
             logger.info(f"The image blur is {self._blurred_threshold}, "
-                        f"which exceeds the threshold of ｛score｝. ｛file_name｝ is filtered out.")
+                        f"which exceeds the threshold of {score}. {file_name} is filtered out.")
             return np.array([])
         return image
diff --git a/runtime/ops/filter/img_similar_images_cleaner/sql/sql_config.json b/runtime/ops/filter/img_similar_images_cleaner/sql/sql_config.json
@@ -2,5 +2,5 @@
   "query_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid ORDER BY timestamp LIMIT :ge OFFSET :le",
   "insert_sql": "INSERT INTO operator_similar_img_features (task_uuid,p_hash,des_matrix,matrix_shape,file_name,timestamp) VALUES (:task_uuid,:p_hash,:des_matrix,:matrix_shape,:file_name,:timestamp)",
   "query_task_uuid_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid",
-  "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_similar_img_features (id SERIAL PRIMARY KEY,task_uuid VARCHAR(255),p_hash TEXT,des_matrix BLOB,matrix_shape TEXT,file_name TEXT,timestamp TIMESTAMP);"
+  "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_similar_img_features (id SERIAL PRIMARY KEY,task_uuid VARCHAR(255),p_hash TEXT,des_matrix BYTEA,matrix_shape TEXT,file_name TEXT,timestamp TIMESTAMP);"
 }
diff --git a/runtime/ops/mapper/img_direction_correct/base_model.py b/runtime/ops/mapper/img_direction_correct/base_model.py
@@ -4,34 +4,15 @@
 import os
 from pathlib import Path
 
-from argparse import Namespace
-
 
 class BaseModel:
 
-    def __init__(self, model_type='vertical'):
+    def __init__(self, *args, **kwargs):
         models_path = os.getenv("MODELS_PATH", "/home/models")
-        args = Namespace()
-        args.cls_image_shape = '3, 224, 224'
-        args.cls_batch_num = 6
-        args.cls_thresh = 0.9
-        args.use_onnx = False
-        args.use_gpu = False
-        args.use_npu = False
-        args.use_xpu = False
-        args.use_mlu = False
-        args.enable_mkldnn = False
-        if model_type == 'vertical':
-            args.cls_model_dir = str(Path(models_path, 'ch_ppocr_mobile_v2.0_cls_infer'))
-            self.model_name = 'standard model to detect image 0 or 90 rotated'
-            args.label_list = ['0', '90']
-        else:
-            args.cls_model_dir = str(Path(models_path, 'ch_ppocr_mobile_v2.0_cls_infer'))
-            self.model_name = 'standard model to detect image 0 or 180 rotated'
-            args.label_list = ['0', '180']
+        model_dir = str(Path(models_path, 'PP-LCNet_x1_0_doc_ori_infer'))
 
-        from paddleocr.tools.infer.predict_cls import TextClassifier
-        self.infer = TextClassifier(args)
+        from paddleocr import DocImgOrientationClassification
+        self.infer = DocImgOrientationClassification(model_dir=model_dir)
 
     def __del__(self):
         del self.infer
diff --git a/runtime/ops/mapper/img_direction_correct/process.py b/runtime/ops/mapper/img_direction_correct/process.py
@@ -24,7 +24,7 @@ def __init__(self, *args, **kwargs):
         self.img_resize = 1000
         self.limit_size = 30000
         self.use_model = True
-        self.vertical_model, self.standard_model = self.get_model(*args, **kwargs)
+        self.model = self.get_model(*args, **kwargs)
 
     @staticmethod
     def _detect_angle(img):
@@ -60,15 +60,17 @@ def _detect_direction(image, file_name, model):
         Returns: 旋转后的图片
         """
         # cls_res为模型预测结果，格式应当类似于: [('90', 0.9815167)]
-        _, cls_res, _ = model.infer([image])
-        rotate_angle = int(cls_res[0][0])
-        pro = float(cls_res[0][1])
+        cls_res = model.infer.predict([image])[0]
+        rotate_angle = int(cls_res.get("class_ids", np.array([0], dtype='int32')).item())
+        pro = float(cls_res.get("scores", np.array([0], dtype='int32')).item())
         logger.info(
             f"fileName: ｛file_name｝, model ｛model.model_name｝ detect result is {rotate_angle} with confidence ｛pro｝")
         if rotate_angle == 90 and pro > 0.89:
             return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
         if rotate_angle == 180 and pro > 0.89:
-            return cv2.rotate(image, 1)
+            return cv2.rotate(image, cv2.ROTATE_180)
+        if rotate_angle == 270 and pro > 0.89:
+            return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
         return image
 
     @staticmethod
@@ -93,7 +95,7 @@ def _rotate_bound(image, angle):
         return dst_img
 
     def init_model(self, *args, **kwargs):
-        return BaseModel(model_type='vertical'), BaseModel(model_type='standard')
+        return BaseModel(*args, **kwargs)
 
     def execute(self, sample: Dict[str, Any]):
         start = time.time()
@@ -103,12 +105,12 @@ def execute(self, sample: Dict[str, Any]):
         img_bytes = sample[self.data_key]
         if img_bytes:
             data = bytes_transform.bytes_to_numpy(img_bytes)
-            correct_data = self._img_direction_correct(data, file_name, self.vertical_model, self.standard_model)
+            correct_data = self._img_direction_correct(data, file_name, self.model)
             sample[self.data_key] = bytes_transform.numpy_to_bytes(correct_data, file_type)
             logger.info(f"fileName: ｛file_name｝, method: ImgDirectionCorrect costs {time.time() - start:6f} s")
         return sample
 
-    def _img_direction_correct(self, img, file_name, vertical_model, standard_model):
+    def _img_direction_correct(self, img, file_name, standard_model):
         height, width = img.shape[:2]
         if max(height, width) > self.limit_size:
             logger.info(
@@ -119,8 +121,6 @@ def _img_direction_correct(self, img, file_name, vertical_model, standard_model)
         angle = self._detect_angle(detect_angle_img)
         # 将图片处理为 0, 90, 180, 270旋转角度的图片
         rotated_img = self._rotate_bound(img, angle)
-        # 水平垂直方向识别：二分类模型，检测图片方向角为 0, 90, 将其处理为 0和180二分类图片
-        rotated_img = self._detect_direction(rotated_img, file_name, vertical_model)
         # 0-180方向识别：二分类模型，检测图片方向角为 0, 180, 将其处理为 0和180二分类图片
         rotated_img = self._detect_direction(rotated_img, file_name, standard_model)
         return rotated_img
diff --git a/scripts/images/runtime/Dockerfile b/scripts/images/runtime/Dockerfile
@@ -6,9 +6,9 @@ RUN --mount=type=cache,target=/var/cache/apt \
     && apt install -y libgl1 libglib2.0-0 vim libmagic1 libreoffice dos2unix swig poppler-utils tesseract-ocr
 
 RUN mkdir -p /home/models \
-    && wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar \
-    && tar -xf ch_ppocr_mobile_v2.0_cls_infer.tar -C /home/models \
-    && rm -f ch_*.tar
+    && wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-LCNet_x1_0_doc_ori_infer.tar \
+    && tar -xf PP-LCNet_x1_0_doc_ori_infer.tar -C /home/models \
+    && rm -f PP_*.tar
 
 COPY runtime/python-executor /opt/runtime
 COPY runtime/ops /opt/runtime/datamate/ops
@@ -22,7 +22,7 @@ ENV UV_INDEX_STRATEGY=unsafe-best-match
 WORKDIR /opt/runtime
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -e .[all] --system \
+    uv pip install -e . --system \
     && uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
     && python -m spacy download zh_core_web_sm \
     && python -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')" \

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"query_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid ORDER BY timestamp LIMIT :ge OFFSET :le",`
`3`	`3`	`"insert_sql": "INSERT INTO operator_similar_img_features (task_uuid,p_hash,des_matrix,matrix_shape,file_name,timestamp) VALUES (:task_uuid,:p_hash,:des_matrix,:matrix_shape,:file_name,:timestamp)",`
`4`	`4`	`"query_task_uuid_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid",`
`5`		`- "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_similar_img_features (id SERIAL PRIMARY KEY,task_uuid VARCHAR(255),p_hash TEXT,des_matrix BLOB,matrix_shape TEXT,file_name TEXT,timestamp TIMESTAMP);"`
	`5`	`+ "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_similar_img_features (id SERIAL PRIMARY KEY,task_uuid VARCHAR(255),p_hash TEXT,des_matrix BYTEA,matrix_shape TEXT,file_name TEXT,timestamp TIMESTAMP);"`
`6`	`6`	`}`