Skip to content

Commit 4938657

Browse files
authored
Release/v1.8.1
Release/v1.8.1
2 parents 31a1752 + 7ca3790 commit 4938657

File tree

147 files changed

+15479
-2247
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+15479
-2247
lines changed

backend/apps/config_app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from apps.datamate_app import router as datamate_router
77
from apps.vectordatabase_app import router as vectordatabase_router
88
from apps.dify_app import router as dify_router
9+
from apps.idata_app import router as idata_router
910
from apps.file_management_app import file_management_config_router as file_manager_router
1011
from apps.image_app import router as proxy_router
1112
from apps.knowledge_summary_app import router as summary_router
@@ -39,6 +40,7 @@
3940
app.include_router(proxy_router)
4041
app.include_router(tool_config_router)
4142
app.include_router(dify_router)
43+
app.include_router(idata_router)
4244

4345
# Choose user management router based on IS_SPEED_MODE
4446
if IS_SPEED_MODE:

backend/apps/data_process_app.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
ConvertStateRequest,
1212
TaskRequest,
1313
)
14+
from consts.exceptions import OfficeConversionException
1415
from data_process.tasks import process_and_forward, process_sync
1516
from services.data_process_service import get_data_process_service
1617

@@ -311,3 +312,35 @@ async def convert_state(request: ConvertStateRequest):
311312
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
312313
detail=f"Error converting state: {str(e)}"
313314
)
315+
316+
317+
@router.post("/convert_to_pdf")
318+
async def convert_office_to_pdf(
319+
object_name: str = Form(...),
320+
pdf_object_name: str = Form(...)
321+
):
322+
"""
323+
Convert an Office document stored in MinIO to PDF.
324+
325+
Parameters:
326+
object_name: Source Office file path in MinIO
327+
pdf_object_name: Destination PDF path in MinIO
328+
"""
329+
try:
330+
await service.convert_office_to_pdf_impl(
331+
object_name=object_name,
332+
pdf_object_name=pdf_object_name,
333+
)
334+
return JSONResponse(status_code=HTTPStatus.OK, content={"success": True})
335+
except OfficeConversionException as exc:
336+
logger.error(f"Office conversion failed for '{object_name}': {exc}")
337+
raise HTTPException(
338+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
339+
detail=str(exc)
340+
)
341+
except Exception as exc:
342+
logger.error(f"Unexpected error during conversion for '{object_name}': {exc}")
343+
raise HTTPException(
344+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
345+
detail=f"Office conversion failed: {exc}"
346+
)

backend/apps/file_management_app.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,29 @@
99
from fastapi import APIRouter, Body, File, Form, Header, HTTPException, Path as PathParam, Query, UploadFile
1010
from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
1111

12+
from consts.exceptions import FileTooLargeException, NotFoundException, OfficeConversionException, UnsupportedFileTypeException
1213
from consts.model import ProcessParams
1314
from services.file_management_service import upload_to_minio, upload_files_impl, \
14-
get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl
15+
get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl, \
16+
preview_file_impl
1517
from utils.file_management_utils import trigger_data_process
1618

1719
logger = logging.getLogger("file_management_app")
1820

1921

20-
def build_content_disposition_header(filename: Optional[str]) -> str:
22+
def build_content_disposition_header(filename: Optional[str], inline: bool = False) -> str:
2123
"""
2224
Build a Content-Disposition header that keeps the original filename.
2325
26+
Args:
27+
filename: Original filename to include in header
28+
inline: If True, use 'inline' disposition (for preview); otherwise 'attachment' (for download)
29+
2430
- ASCII filenames are returned directly.
2531
- Non-ASCII filenames include both an ASCII fallback and RFC 5987 encoded value
2632
so modern browsers keep the original name.
2733
"""
34+
disposition = "inline" if inline else "attachment"
2835
safe_name = (filename or "download").strip() or "download"
2936

3037
def _sanitize_ascii(value: str) -> str:
@@ -40,26 +47,26 @@ def _sanitize_ascii(value: str) -> str:
4047

4148
try:
4249
safe_name.encode("ascii")
43-
return f'attachment; filename="{_sanitize_ascii(safe_name)}"'
50+
return f'{disposition}; filename="{_sanitize_ascii(safe_name)}"'
4451
except UnicodeEncodeError:
4552
try:
4653
encoded = quote(safe_name, safe="")
4754
except Exception:
4855
# quote failure, fallback to sanitized ASCII only
4956
logger.warning("Failed to encode filename '%s', using fallback", safe_name)
50-
return f'attachment; filename="{_sanitize_ascii(safe_name)}"'
57+
return f'{disposition}; filename="{_sanitize_ascii(safe_name)}"'
5158

5259
fallback = _sanitize_ascii(
5360
safe_name.encode("ascii", "ignore").decode("ascii") or "download"
5461
)
55-
return f'attachment; filename="{fallback}"; filename*=UTF-8\'\'{encoded}'
62+
return f'{disposition}; filename="{fallback}"; filename*=UTF-8\'\'{encoded}'
5663
except Exception as exc: # pragma: no cover
5764
logger.warning(
5865
"Failed to encode filename '%s': %s. Using fallback.",
5966
safe_name,
6067
exc,
6168
)
62-
return 'attachment; filename="download"'
69+
return f'{disposition}; filename="download"'
6370

6471
# Create API router
6572
file_management_runtime_router = APIRouter(prefix="/file")
@@ -567,3 +574,69 @@ async def get_storage_file_batch_urls(
567574
"failed_count": sum(1 for r in results if not r.get("success", False)),
568575
"results": results
569576
}
577+
578+
@file_management_config_router.get("/preview/{object_name:path}")
579+
async def preview_file(
580+
object_name: str = PathParam(..., description="File object name to preview"),
581+
filename: Optional[str] = Query(None, description="Original filename for display (optional)")
582+
):
583+
"""
584+
Preview file inline in browser
585+
586+
- **object_name**: File object name in storage
587+
- **filename**: Original filename for Content-Disposition header (optional)
588+
589+
Returns file stream with Content-Disposition: inline for browser preview
590+
"""
591+
try:
592+
# Get file stream from preview service
593+
file_stream, content_type = await preview_file_impl(object_name=object_name)
594+
595+
# Use provided filename or extract from object_name
596+
display_filename = filename
597+
if not display_filename:
598+
display_filename = object_name.split("/")[-1] if "/" in object_name else object_name
599+
600+
# Build Content-Disposition header for inline display
601+
content_disposition = build_content_disposition_header(display_filename, inline=True)
602+
603+
return StreamingResponse(
604+
file_stream,
605+
media_type=content_type,
606+
headers={
607+
"Content-Disposition": content_disposition,
608+
"Cache-Control": "public, max-age=3600",
609+
"ETag": f'"{object_name}"',
610+
}
611+
)
612+
613+
except FileTooLargeException as e:
614+
logger.warning(f"[preview_file] File too large: object_name={object_name}, error={str(e)}")
615+
raise HTTPException(
616+
status_code=HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
617+
detail=str(e)
618+
)
619+
except NotFoundException as e:
620+
logger.error(f"[preview_file] File not found: object_name={object_name}, error={str(e)}")
621+
raise HTTPException(
622+
status_code=HTTPStatus.NOT_FOUND,
623+
detail=f"File not found: {object_name}"
624+
)
625+
except UnsupportedFileTypeException as e:
626+
logger.error(f"[preview_file] Unsupported file type: object_name={object_name}, error={str(e)}")
627+
raise HTTPException(
628+
status_code=HTTPStatus.BAD_REQUEST,
629+
detail=f"File format not supported for preview: {str(e)}"
630+
)
631+
except OfficeConversionException as e:
632+
logger.error(f"[preview_file] Conversion failed: object_name={object_name}, error={str(e)}")
633+
raise HTTPException(
634+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
635+
detail=f"Failed to preview file: {str(e)}"
636+
)
637+
except Exception as e:
638+
logger.error(f"[preview_file] Unexpected error: object_name={object_name}, error={str(e)}")
639+
raise HTTPException(
640+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
641+
detail=f"Failed to preview file: {str(e)}"
642+
)

backend/apps/idata_app.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""
2+
iData App Layer
3+
FastAPI endpoints for iData knowledge space operations.
4+
5+
This module provides API endpoints to interact with iData's API,
6+
including fetching knowledge spaces and transforming responses to a format
7+
compatible with the frontend.
8+
"""
9+
import logging
10+
from http import HTTPStatus
11+
12+
from fastapi import APIRouter, Query
13+
from fastapi.responses import JSONResponse
14+
15+
from consts.error_code import ErrorCode
16+
from consts.exceptions import AppException
17+
from services.idata_service import (
18+
fetch_idata_knowledge_spaces_impl,
19+
fetch_idata_datasets_impl,
20+
)
21+
22+
router = APIRouter(prefix="/idata")
23+
logger = logging.getLogger("idata_app")
24+
25+
26+
@router.get("/knowledge-space")
27+
async def fetch_idata_knowledge_spaces_api(
28+
idata_api_base: str = Query(..., description="iData API base URL"),
29+
api_key: str = Query(..., description="iData API key"),
30+
user_id: str = Query(..., description="iData user ID"),
31+
):
32+
"""
33+
Fetch knowledge spaces from iData API.
34+
35+
Returns knowledge spaces in a format with id and name for frontend compatibility.
36+
"""
37+
try:
38+
# Normalize URL by removing trailing slash
39+
idata_api_base = idata_api_base.rstrip('/')
40+
except Exception as e:
41+
logger.error(f"Invalid iData configuration: {e}")
42+
raise AppException(
43+
ErrorCode.IDATA_CONFIG_INVALID,
44+
f"Invalid URL format: {str(e)}"
45+
)
46+
47+
try:
48+
result = fetch_idata_knowledge_spaces_impl(
49+
idata_api_base=idata_api_base,
50+
api_key=api_key,
51+
user_id=user_id,
52+
)
53+
return JSONResponse(
54+
status_code=HTTPStatus.OK,
55+
content=result
56+
)
57+
except AppException:
58+
# Re-raise AppException to be handled by global middleware
59+
raise
60+
except Exception as e:
61+
logger.error(f"Failed to fetch iData knowledge spaces: {e}")
62+
raise AppException(
63+
ErrorCode.IDATA_SERVICE_ERROR,
64+
f"Failed to fetch iData knowledge spaces: {str(e)}"
65+
)
66+
67+
68+
@router.get("/datasets")
69+
async def fetch_idata_datasets_api(
70+
idata_api_base: str = Query(..., description="iData API base URL"),
71+
api_key: str = Query(..., description="iData API key"),
72+
user_id: str = Query(..., description="iData user ID"),
73+
knowledge_space_id: str = Query(..., description="Knowledge space ID"),
74+
):
75+
"""
76+
Fetch datasets (knowledge bases) from iData API.
77+
78+
Returns knowledge bases in a format consistent with DataMate for frontend compatibility.
79+
"""
80+
try:
81+
# Normalize URL by removing trailing slash
82+
idata_api_base = idata_api_base.rstrip('/')
83+
except Exception as e:
84+
logger.error(f"Invalid iData configuration: {e}")
85+
raise AppException(
86+
ErrorCode.IDATA_CONFIG_INVALID,
87+
f"Invalid URL format: {str(e)}"
88+
)
89+
90+
try:
91+
result = fetch_idata_datasets_impl(
92+
idata_api_base=idata_api_base,
93+
api_key=api_key,
94+
user_id=user_id,
95+
knowledge_space_id=knowledge_space_id,
96+
)
97+
return JSONResponse(
98+
status_code=HTTPStatus.OK,
99+
content=result
100+
)
101+
except AppException:
102+
# Re-raise AppException to be handled by global middleware
103+
raise
104+
except Exception as e:
105+
logger.error(f"Failed to fetch iData datasets: {e}")
106+
raise AppException(
107+
ErrorCode.IDATA_SERVICE_ERROR,
108+
f"Failed to fetch iData datasets: {str(e)}"
109+
)

0 commit comments

Comments
 (0)