Skip to content

Commit 64c5552

Browse files
authored
fix csv file reader (#2248)
* fix * fix test with inline content * fix format
1 parent a82d186 commit 64c5552

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "fix csv reader"
4+
}

packages/graphrag-input/graphrag_input/csv.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""A module containing 'CSVFileReader' model."""
55

66
import csv
7+
import io
78
import logging
89
import sys
910

@@ -39,6 +40,6 @@ async def read_file(self, path: str) -> list[TextDocument]:
3940
"""
4041
file = await self._storage.get(path, encoding=self._encoding)
4142

42-
reader = csv.DictReader(file.splitlines())
43+
reader = csv.DictReader(io.StringIO(file))
4344
rows = list(reader)
4445
return await self.process_data_columns(rows, path)

tests/unit/indexing/input/test_csv_loader.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,26 @@ async def test_csv_loader_multiple_files():
5454
reader = create_input_reader(config, storage)
5555
documents = await reader.read_files()
5656
assert len(documents) == 4
57+
58+
59+
async def test_csv_loader_preserves_multiline_fields(tmp_path):
60+
"""Multiline quoted CSV fields must retain their internal newlines."""
61+
csv_content = (
62+
"title,text\r\n"
63+
'"Post 1","Line one.\nLine two.\nLine three."\r\n'
64+
'"Post 2","Single line."\r\n'
65+
)
66+
(tmp_path / "input.csv").write_text(csv_content, encoding="utf-8")
67+
config = InputConfig(
68+
type=InputType.Csv,
69+
text_column="text",
70+
title_column="title",
71+
)
72+
storage = create_storage(StorageConfig(base_dir=str(tmp_path)))
73+
reader = create_input_reader(config, storage)
74+
documents = await reader.read_files()
75+
assert len(documents) == 2
76+
assert documents[0].title == "Post 1"
77+
assert documents[0].text == "Line one.\nLine two.\nLine three."
78+
assert documents[1].title == "Post 2"
79+
assert documents[1].text == "Single line."

0 commit comments

Comments
 (0)