Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions regress/age_load/data/bad_delim_edges.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
start_id|start_vertex_type|end_id|end_vertex_type
1|V|2|V
2 changes: 2 additions & 0 deletions regress/age_load/data/edges_long_row.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
start_id,start_vertex_type,end_id,end_vertex_type
1,V,2,V,extra
2 changes: 2 additions & 0 deletions regress/age_load/data/edges_short_row.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
start_id,start_vertex_type,end_id,end_vertex_type
1,V
2 changes: 2 additions & 0 deletions regress/age_load/data/labels_long_row.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,name
1,Alice,extra
57 changes: 57 additions & 0 deletions regress/expected/age_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,63 @@ NOTICE: graph "agload_conversion" has been dropped

(1 row)

--
-- Issue 2449: mis-delimited / malformed load files must fail with a clear
-- error instead of segfaulting or silently corrupting data. Edge files
-- require the 4 fixed columns; a file that is not comma-delimited parses as
-- a single column, so this must be rejected at the header.
--
SELECT create_graph('agload_delim');
NOTICE: graph "agload_delim" has been created
create_graph
--------------

(1 row)

SELECT create_vlabel('agload_delim', 'V');
NOTICE: VLabel "V" has been created
create_vlabel
---------------

(1 row)

SELECT create_elabel('agload_delim', 'E');
NOTICE: ELabel "E" has been created
create_elabel
---------------

(1 row)

-- pipe-delimited edge file -> parses to 1 column -> clean error at the header
-- (was a segfault)
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/bad_delim_edges.csv');
ERROR: edge file must have at least 4 columns (start_id, start_vertex_type, end_id, end_vertex_type), but the header has 1
HINT: load_edges_from_file expects a comma-delimited CSV; check the file's delimiter.
-- per-row guards (header is valid, but an individual data row is ragged):
-- an edge row with fewer than 4 columns -> clean error (was an OOB read of
-- the fixed fields[1..3])
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_short_row.csv');
ERROR: edge file row has 2 columns; expected at least 4 and no more than the header's 4 columns
-- an edge row with more columns than the header -> clean error (was an OOB
-- read of header[i] in create_agtype_from_list_i)
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_long_row.csv');
ERROR: edge file row has 5 columns; expected at least 4 and no more than the header's 4 columns
-- a label row with more columns than the header -> clean error (was an OOB
-- read of header[i] in create_agtype_from_list)
SELECT load_labels_from_file('agload_delim', 'V', 'age_load/labels_long_row.csv');
ERROR: label file row has 3 columns, more than the header's 2 columns
SELECT drop_graph('agload_delim', true);
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table agload_delim._ag_label_vertex
drop cascades to table agload_delim._ag_label_edge
drop cascades to table agload_delim."V"
drop cascades to table agload_delim."E"
NOTICE: graph "agload_delim" has been dropped
drop_graph
------------

(1 row)

--
-- Test security and permissions
--
Expand Down
29 changes: 29 additions & 0 deletions regress/sql/age_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,35 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t
--
SELECT drop_graph('agload_conversion', true);

--
-- Issue 2449: mis-delimited / malformed load files must fail with a clear
-- error instead of segfaulting or silently corrupting data. Edge files
-- require the 4 fixed columns; a file that is not comma-delimited parses as
-- a single column, so this must be rejected at the header.
--
SELECT create_graph('agload_delim');
SELECT create_vlabel('agload_delim', 'V');
SELECT create_elabel('agload_delim', 'E');

-- pipe-delimited edge file -> parses to 1 column -> clean error at the header
-- (was a segfault)
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/bad_delim_edges.csv');

-- per-row guards (header is valid, but an individual data row is ragged):
-- an edge row with fewer than 4 columns -> clean error (was an OOB read of
-- the fixed fields[1..3])
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_short_row.csv');

-- an edge row with more columns than the header -> clean error (was an OOB
-- read of header[i] in create_agtype_from_list_i)
SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_long_row.csv');

-- a label row with more columns than the header -> clean error (was an OOB
-- read of header[i] in create_agtype_from_list)
SELECT load_labels_from_file('agload_delim', 'V', 'age_load/labels_long_row.csv');

SELECT drop_graph('agload_delim', true);

--
-- Test security and permissions
--
Expand Down
39 changes: 39 additions & 0 deletions src/backend/utils/load/ag_load_edges.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,24 @@ static void process_edge_row(char **fields, int nfields,
char *end_vertex_type;
agtype *edge_properties;

/*
* Guard the fixed fields[0..3] accesses below and the header[i]/fields[i]
* pairing in create_agtype_from_list_i() against out-of-bounds reads on
* malformed or mis-delimited rows. A row must have at least the 4 fixed
* columns and no more columns than the header (rows with fewer trailing
* property columns than the header are allowed, matching existing
* behavior). A single-column row from a non-comma-delimited file is
* rejected here (previously it segfaulted).
*/
if (nfields < 4 || nfields > header_count)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("edge file row has %d columns; expected at least 4 "
"and no more than the header's %d columns",
nfields, header_count)));
}
Comment thread
gregfelice marked this conversation as resolved.

/* Generate edge ID */
entry_id = nextval_internal(label_seq_relid, true);
edge_id = make_graphid(label_id, entry_id);
Expand Down Expand Up @@ -219,6 +237,27 @@ int create_edges_from_csv_file(char *file_path,
header[i] = trim_whitespace(fields[i]);
}

/*
* Edge files require the four fixed columns start_id,
* start_vertex_type, end_id and end_vertex_type. A smaller
* count almost always means the file is not comma-delimited
* (COPY defaults to comma). Fail clearly here instead of
* reading past the parsed fields in process_edge_row(), which
* previously caused a segfault.
*/
if (header_count < 4)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("edge file must have at least 4 columns "
"(start_id, start_vertex_type, end_id, "
"end_vertex_type), but the header has %d",
header_count),
errhint("load_edges_from_file expects a "
"comma-delimited CSV; check the file's "
"delimiter.")));
}

is_first_row = false;
}
else
Expand Down
17 changes: 17 additions & 0 deletions src/backend/utils/load/ag_load_labels.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ static void process_vertex_row(char **fields, int nfields,
TupleTableSlot *slot;
agtype *vertex_properties;

/*
* Guard the header[i]/fields[i] pairing in create_agtype_from_list()
* against out-of-bounds reads on malformed rows that have more fields
* than the header. Rows with fewer fields than the header are allowed
* (matching existing behavior). Note: a file delimited by something
* other than comma is parsed as a single column throughout, so header
* and rows still match and the data lands in properties verbatim --
* specifying the delimiter is the separate fix for that.
*/
if (nfields > header_count)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("label file row has %d columns, more than the "
"header's %d columns", nfields, header_count)));
}
Comment thread
gregfelice marked this conversation as resolved.

/* Generate or use provided entry_id */
if (id_field_exists)
{
Expand Down
Loading