Skip to content

Commit a964b0e

Browse files
committed
feat: 优化mineru构建部署参数,适配310P
1 parent 80892d9 commit a964b0e

File tree

9 files changed

+106
-216
lines changed

9 files changed

+106
-216
lines changed

Makefile

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ endef
152152
# ========== Build Targets ==========
153153

154154
# Valid build targets
155-
VALID_BUILD_TARGETS := backend database frontend runtime backend-python deer-flow mineru mineru-npu gateway label-studio
155+
VALID_BUILD_TARGETS := frontend backend gateway database runtime backend-python deer-flow label-studio mineru mineru-910B mineru-910C mineru-310P
156156

157157
# Generic docker build target with service name as parameter
158158
# Automatically prefixes image names with "datamate-" unless it's deer-flow
@@ -169,6 +169,12 @@ VALID_BUILD_TARGETS := backend database frontend runtime backend-python deer-flo
169169
@if [ "$*" = "deer-flow" ]; then \
170170
$(call docker-build,deer-flow-backend,deer-flow-backend); \
171171
$(call docker-build,deer-flow-frontend,deer-flow-frontend); \
172+
elif [ "$*" = "mineru" ] || [ "$*" = "mineru-910B" ]; then \
173+
wget -qO - https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/npu.Dockerfile | docker build --network=host -t datamate-mineru -f - .; \
174+
elif [ "$*" = "mineru-910C" ]; then \
175+
wget -qO - https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/npu.Dockerfile | sed 's/v0.11.0/v0.11.0-a3/g' | docker build --network=host -t datamate-mineru -f - .; \
176+
elif [ "$*" = "mineru-310P" ]; then \
177+
wget -qO - https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/npu.Dockerfile | sed 's/v0.11.0/v0.10.0rc1-310p/g' | docker build --network=host -t datamate-mineru -f - .; \
172178
else \
173179
$(call docker-build,$*,datamate-$*); \
174180
fi
@@ -235,7 +241,7 @@ endif
235241
# ========== Docker Install/Uninstall Targets ==========
236242

237243
# Valid service targets for docker install/uninstall
238-
VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python database gateway redis mineru deer-flow milvus label-studio data-juicer dj
244+
VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python database gateway redis deer-flow milvus label-studio data-juicer mineru mineru-910B mineru-910C mineru-310P
239245

240246
# Generic docker service install target
241247
.PHONY: %-docker-install
@@ -252,9 +258,11 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python databa
252258
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile label-studio up -d; \
253259
elif [ "$*" = "datamate" ]; then \
254260
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d; \
255-
elif [ "$*" = "mineru" ]; then \
261+
elif [ "$*" = "mineru" ] || [ "$*" = "mineru-910B" ] || [ "$*" = "mineru-910C" ]; then \
256262
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d datamate-mineru; \
257-
elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \
263+
elif [ "$*" = "mineru-310P" ]; then \
264+
REGISTRY=$(REGISTRY) EXTRA_ARGS="--enforce-eager --dtype float16" docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d datamate-mineru; \
265+
elif [ "$*" = "data-juicer" ]; then \
258266
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile data-juicer up -d datamate-data-juicer; \
259267
elif [ "$*" = "redis" ]; then \
260268
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile redis up -d datamate-redis; \
@@ -281,7 +289,7 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python databa
281289
fi
282290
@if [ "$*" = "label-studio" ]; then \
283291
docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s label-studio; \
284-
elif [ "$*" = "mineru" ]; then \
292+
elif [ "$*" = "mineru" ] || [ "$*" = "mineru-910B" ] || [ "$*" = "mineru-910C" ] || [ "$*" = "mineru-310P" ]; then \
285293
docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-mineru; \
286294
elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \
287295
docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-data-juicer; \
@@ -304,7 +312,7 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python databa
304312
# ========== Kubernetes Install/Uninstall Targets ==========
305313

306314
# Valid k8s targets
307-
VALID_K8S_TARGETS := mineru datamate deer-flow milvus label-studio data-juicer dj
315+
VALID_K8S_TARGETS := datamate deer-flow milvus label-studio data-juicer mineru mineru-910B mineru-910C mineru-310P
308316

309317
# Generic k8s install target
310318
.PHONY: %-k8s-install
@@ -319,8 +327,10 @@ VALID_K8S_TARGETS := mineru datamate deer-flow milvus label-studio data-juicer d
319327
fi
320328
@if [ "$*" = "label-studio" ]; then \
321329
helm upgrade label-studio deployment/helm/label-studio/ -n $(NAMESPACE) --install; \
322-
elif [ "$*" = "mineru" ]; then \
323-
kubectl apply -f deployment/kubernetes/mineru/deploy.yaml -n $(NAMESPACE); \
330+
elif [ "$*" = "mineru" ] || [ "$*" = "mineru-910B" ] || [ "$*" = "mineru-910C" ]; then \
331+
kubectl apply -f deployment/kubernetes/mineru/deploy-910.yaml -n $(NAMESPACE); \
332+
elif [ "$*" = "mineru-310P" ]; then \
333+
kubectl apply -f deployment/kubernetes/mineru/deploy-310.yaml -n $(NAMESPACE); \
324334
elif [ "$*" = "datamate" ]; then \
325335
helm upgrade datamate deployment/helm/datamate/ -n $(NAMESPACE) --install --set global.image.repository=$(REGISTRY); \
326336
elif [ "$*" = "deer-flow" ]; then \
@@ -346,8 +356,10 @@ VALID_K8S_TARGETS := mineru datamate deer-flow milvus label-studio data-juicer d
346356
done; \
347357
exit 1; \
348358
fi
349-
@if [ "$*" = "mineru" ]; then \
350-
kubectl delete -f deployment/kubernetes/mineru/deploy.yaml -n $(NAMESPACE); \
359+
@if [ "$*" = "mineru" ] || [ "$*" = "mineru-910B" ] || [ "$*" = "mineru-910C" ]; then \
360+
kubectl delete -f deployment/kubernetes/mineru/deploy-910.yaml -n $(NAMESPACE); \
361+
elif [ "$*" = "mineru-310P" ]; then \
362+
kubectl delete -f deployment/kubernetes/mineru/deploy-310.yaml -n $(NAMESPACE); \
351363
elif [ "$*" = "datamate" ]; then \
352364
helm uninstall datamate -n $(NAMESPACE) --ignore-not-found; \
353365
elif [ "$*" = "deer-flow" ]; then \
@@ -360,26 +372,6 @@ VALID_K8S_TARGETS := mineru datamate deer-flow milvus label-studio data-juicer d
360372
kubectl delete -f deployment/kubernetes/data-juicer/deploy.yaml -n $(NAMESPACE); \
361373
fi
362374

363-
# ========== Upgrade Targets ==========
364-
365-
# Valid upgrade targets
366-
VALID_UPGRADE_TARGETS := datamate
367-
368-
# Generic docker upgrade target
369-
.PHONY: %-docker-upgrade
370-
%-docker-upgrade:
371-
@if ! echo " $(VALID_UPGRADE_TARGETS) " | grep -q " $* "; then \
372-
echo "Error: Unknown upgrade target '$*'"; \
373-
echo "Valid upgrade targets are:"; \
374-
for target in $(VALID_UPGRADE_TARGETS); do \
375-
echo " - $$target"; \
376-
done; \
377-
exit 1; \
378-
fi
379-
@if [ "$*" = "datamate" ]; then \
380-
docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d --force-recreate --remove-orphans; \
381-
fi
382-
383375
# ========== Download Targets ==========
384376

385377
# List of all images to download

deployment/docker/datamate/docker-compose.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ services:
4242
image: ${REGISTRY:-}datamate-gateway
4343
restart: on-failure
4444
privileged: true
45+
ports:
46+
- '8080:8080'
4547
environment:
4648
- JWT_SECRET=default-insecure-key-change-in-production
4749
networks: [ datamate ]
@@ -111,14 +113,14 @@ services:
111113
MINERU_DEVICE_MODE: npu
112114
VLLM_WORKER_MULTIPROC_METHOD: spawn
113115
privileged: true
114-
entrypoint: mineru-openai-server
115-
command:
116+
entrypoint: /bin/sh -c
117+
command: >
118+
"exec mineru-openai-server
116119
--engine vllm
117120
--host 0.0.0.0
118121
--port 8000
122+
${EXTRA_ARGS:-}"
119123
volumes:
120-
- dataset_volume:/dataset
121-
- mineru_log_volume:/var/log/datamate/mineru
122124
- /var/log/npu/:/usr/slog
123125
- /usr/local/dcmi:/usr/local/dcmi
124126
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -334,8 +336,6 @@ volumes:
334336
name: datamate-operator-runtime-volume
335337
operator-packages-volume:
336338
name: datamate-operator-packages-volume
337-
mineru_log_volume:
338-
name: datamate-mineru_log_volume
339339
graph_data_volume:
340340
name: datamate-graph-data-volume
341341

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: datamate-mineru
5+
labels:
6+
app: datamate
7+
tier: mineru
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: datamate
13+
tier: mineru
14+
template:
15+
metadata:
16+
labels:
17+
app: datamate
18+
tier: mineru
19+
spec:
20+
containers:
21+
- name: mineru
22+
image: datamate-mineru
23+
imagePullPolicy: IfNotPresent
24+
command:
25+
- mineru-openai-server
26+
args:
27+
- --engine
28+
- vllm
29+
- --host
30+
- 0.0.0.0
31+
- --port
32+
- "8000"
33+
- --enforce-eager
34+
- --dtype
35+
- float16
36+
env:
37+
- name: MINERU_MODEL_SOURCE
38+
value: local
39+
- name: MINERU_DEVICE_MODE
40+
value: npu
41+
- name: VLLM_WORKER_MULTIPROC_METHOD
42+
value: spawn
43+
ports:
44+
- containerPort: 8000
45+
resources:
46+
limits:
47+
cpu: 8
48+
memory: 32Gi
49+
huawei.com/Ascend310P: 1
50+
requests:
51+
cpu: 100m
52+
memory: 100Mi
53+
huawei.com/Ascend310P: 1
54+
55+
---
56+
apiVersion: v1
57+
kind: Service
58+
metadata:
59+
name: datamate-mineru
60+
labels:
61+
app: datamate
62+
tier: mineru
63+
spec:
64+
type: ClusterIP
65+
ports:
66+
- port: 8000
67+
targetPort: 8000
68+
protocol: TCP
69+
selector:
70+
app: datamate
71+
tier: mineru

deployment/kubernetes/mineru/deploy.yaml renamed to deployment/kubernetes/mineru/deploy-910.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,6 @@ spec:
4848
cpu: 100m
4949
memory: 100Mi
5050
huawei.com/Ascend910: 1
51-
volumeMounts:
52-
- name: dataset-volume
53-
mountPath: /dataset
54-
- name: log-volume
55-
mountPath: /var/log/datamate/mineru
56-
subPath: mineru
57-
volumes:
58-
- name: dataset-volume
59-
persistentVolumeClaim:
60-
claimName: datamate-dataset-pvc
61-
- name: log-volume
62-
persistentVolumeClaim:
63-
claimName: datamate-log-pvc
6451

6552
---
6653
apiVersion: v1

runtime/mineru/mineru_api.py

Lines changed: 0 additions & 112 deletions
This file was deleted.

runtime/ops/formatter/mineru_formatter/metadata.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ effect:
1515
inputs: 'text'
1616
outputs: 'text'
1717
settings:
18+
mineruApi:
19+
name: 'Mineru Api地址'
20+
description: '指定mineru服务的api地址,默认为本地同一集群内地址。'
21+
type: 'input'
22+
defaultVal: 'http://datamate-mineru:8000'
23+
required: false
1824
exportType:
1925
name: '导出类型'
2026
description: '指定清洗结果文件类型。若指定为md且后续存在其他清洗算子,可能导致文件格式错乱。'

runtime/ops/formatter/mineru_formatter/process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class MineruFormatter(Mapper):
2727

2828
def __init__(self, *args, **kwargs):
2929
super(MineruFormatter, self).__init__(*args, **kwargs)
30-
self.server_url = "http://datamate-mineru:8000"
30+
self.server_url = kwargs.get("mineruApi", "http://datamate-mineru:8000")
3131
self.backend = "vlm-http-client"
3232
self.output_dir = "/dataset/outputs"
3333
self.max_retries = 3

0 commit comments

Comments
 (0)