diff --git a/Makefile b/Makefile index 32f34d6e6..2ea8f64ed 100644 --- a/Makefile +++ b/Makefile @@ -223,6 +223,7 @@ REGRESS = scan \ age_reduce \ map_projection \ direct_field_access \ + generated_columns \ security \ reserved_keyword_alias \ agtype_jsonb_cast \ diff --git a/regress/expected/generated_columns.out b/regress/expected/generated_columns.out new file mode 100644 index 000000000..4e929110c --- /dev/null +++ b/regress/expected/generated_columns.out @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Regression test for issue #2450. + * + * A GENERATED ALWAYS ... STORED column (or any user-added column) on a label + * table adds an attribute AGE's CREATE/MERGE/SET paths do not populate. The + * slot's tuple descriptor is the full relation descriptor, so heap_form_tuple() + * used to read the uninitialized slot entry and segfault. AGE now null-inits the + * entity slot and computes stored generated columns before materializing the + * tuple. Generated columns must be computed on insert and recomputed on update; + * plain user columns must default to NULL rather than crash. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('generated_columns'); +NOTICE: graph "generated_columns" has been created + create_graph +-------------- + +(1 row) + +-- +-- GENERATED ALWAYS ... STORED column on a vertex label +-- +SELECT create_vlabel('generated_columns', 'Product'); +NOTICE: VLabel "Product" has been created + create_vlabel +--------------- + +(1 row) + +ALTER TABLE generated_columns."Product" + ADD COLUMN category varchar(25) + GENERATED ALWAYS AS (agtype_access_operator(properties, '"category"')) STORED; +-- CREATE: the generated column is computed from the new properties +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Product {category: 'disk', type: 'M1234'}) RETURN p +$$) AS (p agtype); + p +---------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Product", "properties": {"type": "M1234", "category": "disk"}}::vertex +(1 row) + +-- CREATE where the generation expression yields NULL (no such key) +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Product {type: 'no-cat'}) RETURN p +$$) AS (p agtype); + p +--------------------------------------------------------------------------------------- + {"id": 844424930131970, "label": "Product", "properties": {"type": "no-cat"}}::vertex +(1 row) + +-- MERGE also computes the generated column +SELECT * FROM cypher('generated_columns', $$ + MERGE (p:Product {category: 'merged'}) RETURN p +$$) AS (p agtype); + p +------------------------------------------------------------------------------------------- + {"id": 844424930131971, "label": "Product", "properties": {"category": "merged"}}::vertex +(1 row) + +-- MERGE again on the same pattern must MATCH (not re-insert); this exercises the +-- matched path where the slot is intentionally left uncleared (issue #2450) +SELECT * FROM cypher('generated_columns', $$ + MERGE (p:Product {category: 'merged'}) RETURN p +$$) AS (p agtype); + p +------------------------------------------------------------------------------------------- + {"id": 844424930131971, "label": "Product", "properties": {"category": "merged"}}::vertex +(1 row) + +-- SET recomputes the generated column from the updated properties +SELECT * FROM cypher('generated_columns', $$ + MATCH (p:Product {category: 'disk'}) SET p.category = 'ssd', p.note = 'x' RETURN p +$$) AS (p agtype); + p +---------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Product", "properties": {"note": "x", "type": "M1234", "category": "ssd"}}::vertex +(1 row) + +-- The stored generated column always mirrors properties.category +SELECT category, properties FROM generated_columns."Product" ORDER BY category NULLS LAST; + category | properties +----------+--------------------------------------------------- + "merged" | {"category": "merged"} + "ssd" | {"note": "x", "type": "M1234", "category": "ssd"} + | {"type": "no-cat"} +(3 rows) + +-- +-- GENERATED ALWAYS ... STORED column on an edge label +-- +SELECT create_elabel('generated_columns', 'REL'); +NOTICE: ELabel "REL" has been created + create_elabel +--------------- + +(1 row) + +ALTER TABLE generated_columns."REL" + ADD COLUMN kind varchar(25) + GENERATED ALWAYS AS (agtype_access_operator(properties, '"kind"')) STORED; +SELECT * FROM cypher('generated_columns', $$ + CREATE (:Product {category: 'a'})-[r:REL {kind: 'link'}]->(:Product {category: 'b'}) + RETURN r +$$) AS (r agtype); + r +---------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "REL", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {"kind": "link"}}::edge +(1 row) + +SELECT kind FROM generated_columns."REL"; + kind +-------- + "link" +(1 row) + +-- +-- Plain (non-generated) user column: must not crash; defaults to NULL +-- +SELECT create_vlabel('generated_columns', 'Plain'); +NOTICE: VLabel "Plain" has been created + create_vlabel +--------------- + +(1 row) + +ALTER TABLE generated_columns."Plain" ADD COLUMN tag text; +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Plain {n: 1}) RETURN p +$$) AS (p agtype); + p +---------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "Plain", "properties": {"n": 1}}::vertex +(1 row) + +SELECT * FROM cypher('generated_columns', $$ + MATCH (p:Plain) SET p.n = 2 RETURN p +$$) AS (p agtype); + p +---------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "Plain", "properties": {"n": 2}}::vertex +(1 row) + +SELECT properties, tag FROM generated_columns."Plain"; + properties | tag +------------+----- + {"n": 2} | +(1 row) + +-- +-- Generated column referencing the tableoid system column: the entity slot must +-- carry tts_tableOid before the generated columns are computed, otherwise the +-- expression sees InvalidOid instead of the label table's OID (issue #2450). +-- +SELECT create_vlabel('generated_columns', 'T'); +NOTICE: VLabel "T" has been created + create_vlabel +--------------- + +(1 row) + +ALTER TABLE generated_columns."T" + ADD COLUMN tbl oid GENERATED ALWAYS AS (tableoid) STORED; +SELECT * FROM cypher('generated_columns', $$ + CREATE (n:T {k: 1}) RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------ + {"id": 1688849860263937, "label": "T", "properties": {"k": 1}}::vertex +(1 row) + +SELECT * FROM cypher('generated_columns', $$ + MATCH (n:T) SET n.k = 2 RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------ + {"id": 1688849860263937, "label": "T", "properties": {"k": 2}}::vertex +(1 row) + +-- tbl must equal the real label-table OID on both the CREATE and SET paths +SELECT tbl = 'generated_columns."T"'::regclass::oid AS tableoid_ok +FROM generated_columns."T"; + tableoid_ok +------------- + t +(1 row) + +-- +-- Cleanup +-- +SELECT drop_graph('generated_columns', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table generated_columns._ag_label_vertex +drop cascades to table generated_columns._ag_label_edge +drop cascades to table generated_columns."Product" +drop cascades to table generated_columns."REL" +drop cascades to table generated_columns."Plain" +drop cascades to table generated_columns."T" +NOTICE: graph "generated_columns" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/generated_columns.sql b/regress/sql/generated_columns.sql new file mode 100644 index 000000000..5229c28f1 --- /dev/null +++ b/regress/sql/generated_columns.sql @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Regression test for issue #2450. + * + * A GENERATED ALWAYS ... STORED column (or any user-added column) on a label + * table adds an attribute AGE's CREATE/MERGE/SET paths do not populate. The + * slot's tuple descriptor is the full relation descriptor, so heap_form_tuple() + * used to read the uninitialized slot entry and segfault. AGE now null-inits the + * entity slot and computes stored generated columns before materializing the + * tuple. Generated columns must be computed on insert and recomputed on update; + * plain user columns must default to NULL rather than crash. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('generated_columns'); + +-- +-- GENERATED ALWAYS ... STORED column on a vertex label +-- +SELECT create_vlabel('generated_columns', 'Product'); +ALTER TABLE generated_columns."Product" + ADD COLUMN category varchar(25) + GENERATED ALWAYS AS (agtype_access_operator(properties, '"category"')) STORED; + +-- CREATE: the generated column is computed from the new properties +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Product {category: 'disk', type: 'M1234'}) RETURN p +$$) AS (p agtype); + +-- CREATE where the generation expression yields NULL (no such key) +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Product {type: 'no-cat'}) RETURN p +$$) AS (p agtype); + +-- MERGE also computes the generated column +SELECT * FROM cypher('generated_columns', $$ + MERGE (p:Product {category: 'merged'}) RETURN p +$$) AS (p agtype); + +-- MERGE again on the same pattern must MATCH (not re-insert); this exercises the +-- matched path where the slot is intentionally left uncleared (issue #2450) +SELECT * FROM cypher('generated_columns', $$ + MERGE (p:Product {category: 'merged'}) RETURN p +$$) AS (p agtype); + +-- SET recomputes the generated column from the updated properties +SELECT * FROM cypher('generated_columns', $$ + MATCH (p:Product {category: 'disk'}) SET p.category = 'ssd', p.note = 'x' RETURN p +$$) AS (p agtype); + +-- The stored generated column always mirrors properties.category +SELECT category, properties FROM generated_columns."Product" ORDER BY category NULLS LAST; + +-- +-- GENERATED ALWAYS ... STORED column on an edge label +-- +SELECT create_elabel('generated_columns', 'REL'); +ALTER TABLE generated_columns."REL" + ADD COLUMN kind varchar(25) + GENERATED ALWAYS AS (agtype_access_operator(properties, '"kind"')) STORED; + +SELECT * FROM cypher('generated_columns', $$ + CREATE (:Product {category: 'a'})-[r:REL {kind: 'link'}]->(:Product {category: 'b'}) + RETURN r +$$) AS (r agtype); + +SELECT kind FROM generated_columns."REL"; + +-- +-- Plain (non-generated) user column: must not crash; defaults to NULL +-- +SELECT create_vlabel('generated_columns', 'Plain'); +ALTER TABLE generated_columns."Plain" ADD COLUMN tag text; + +SELECT * FROM cypher('generated_columns', $$ + CREATE (p:Plain {n: 1}) RETURN p +$$) AS (p agtype); + +SELECT * FROM cypher('generated_columns', $$ + MATCH (p:Plain) SET p.n = 2 RETURN p +$$) AS (p agtype); + +SELECT properties, tag FROM generated_columns."Plain"; + +-- +-- Generated column referencing the tableoid system column: the entity slot must +-- carry tts_tableOid before the generated columns are computed, otherwise the +-- expression sees InvalidOid instead of the label table's OID (issue #2450). +-- +SELECT create_vlabel('generated_columns', 'T'); +ALTER TABLE generated_columns."T" + ADD COLUMN tbl oid GENERATED ALWAYS AS (tableoid) STORED; + +SELECT * FROM cypher('generated_columns', $$ + CREATE (n:T {k: 1}) RETURN n +$$) AS (n agtype); + +SELECT * FROM cypher('generated_columns', $$ + MATCH (n:T) SET n.k = 2 RETURN n +$$) AS (n agtype); + +-- tbl must equal the real label-table OID on both the CREATE and SET paths +SELECT tbl = 'generated_columns."T"'::regclass::oid AS tableoid_ok +FROM generated_columns."T"; + +-- +-- Cleanup +-- +SELECT drop_graph('generated_columns', true); diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 40f446bf6..36ef61b32 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -396,7 +396,7 @@ static void create_edge(cypher_create_custom_scan_state *css, estate->es_result_relations = &resultRelInfo; - ExecClearTuple(elemTupleSlot); + clear_entity_slot(elemTupleSlot); /* Graph Id for the edge */ id = ExecEvalExpr(node->id_expr_state, econtext, &isNull); @@ -491,7 +491,7 @@ static Datum create_vertex(cypher_create_custom_scan_state *css, estate->es_result_relations = &resultRelInfo; - ExecClearTuple(elemTupleSlot); + clear_entity_slot(elemTupleSlot); /* get the next graphid for this vertex. */ id = ExecEvalExpr(node->id_expr_state, econtext, &isNull); diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 6f2e9376e..9c52073c2 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -1199,7 +1199,16 @@ static Datum merge_vertex(cypher_merge_custom_scan_state *css, estate->es_result_relations = &resultRelInfo; - ExecClearTuple(elemTupleSlot); + /* + * Only clear and null-init the slot when we will actually insert. On a + * matched MERGE the slot is never materialized (the output is built from + * id/prop directly), so the null-init memset would be wasted work that + * scales with any user-added columns (issue #2450 review follow-up). + */ + if (should_insert) + { + clear_entity_slot(elemTupleSlot); + } /* if we not are going to insert, we need our structure pointers */ if (should_insert == false && @@ -1526,7 +1535,16 @@ static void merge_edge(cypher_merge_custom_scan_state *css, estate->es_result_relations = &resultRelInfo; - ExecClearTuple(elemTupleSlot); + /* + * Only clear and null-init the slot when we will actually insert. On a + * matched MERGE the slot is never materialized (the output is built from + * id/prop directly), so the null-init memset would be wasted work that + * scales with any user-added columns (issue #2450 review follow-up). + */ + if (should_insert) + { + clear_entity_slot(elemTupleSlot); + } /* if we not are going to insert, we need our structure pointers */ if (should_insert == false && diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index fe3633f8d..7a0d48f0c 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -21,6 +21,7 @@ #include "common/hashfn.h" #include "executor/executor.h" +#include "executor/nodeModifyTable.h" #include "storage/bufmgr.h" #include "utils/rls.h" @@ -131,6 +132,28 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, close_indices = true; } ExecStoreVirtualTuple(elemTupleSlot); + + /* + * Recompute any stored generated columns before materializing the heap + * tuple. The slot's tuple descriptor is the full relation descriptor, + * which may include a GENERATED ALWAYS ... STORED column that the SET + * path does not populate; leaving those slot entries uninitialized makes + * heap_form_tuple() segfault on the garbage values (issue #2450). + */ + if (resultRelInfo->ri_RelationDesc->rd_att->constr != NULL && + resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored) + { + /* + * A generation expression may reference the tableoid system column, + * so the slot must carry the relation's OID before we recompute the + * stored generated columns (mirrors PostgreSQL's own ExecUpdate path). + */ + elemTupleSlot->tts_tableOid = + RelationGetRelid(resultRelInfo->ri_RelationDesc); + ExecComputeStoredGenerated(resultRelInfo, estate, elemTupleSlot, + CMD_UPDATE); + } + tuple = ExecFetchSlotHeapTuple(elemTupleSlot, true, NULL); tuple->t_self = old_tuple->t_self; diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index 8237cdcce..c697a560c 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -25,6 +25,7 @@ #include "postgres.h" #include "executor/executor.h" +#include "executor/nodeModifyTable.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" @@ -128,6 +129,24 @@ void destroy_entity_result_rel_info(ResultRelInfo *result_rel_info) table_close(result_rel_info->ri_RelationDesc, RowExclusiveLock); } +/* + * Clear an entity slot and mark every attribute NULL before AGE fills in the + * columns it manages (id/start_id/end_id/properties). + * + * The slot's tuple descriptor is the full label-table descriptor, which may + * contain columns AGE does not populate -- e.g. a user-added plain column, or a + * GENERATED ALWAYS ... STORED column. Without this, those attributes keep stale + * slot memory and heap_form_tuple() segfaults dereferencing the garbage + * (issue #2450). Plain columns then default to NULL; generated columns are + * recomputed via ExecComputeStoredGenerated() before the tuple is materialized. + */ +void clear_entity_slot(TupleTableSlot *elemTupleSlot) +{ + ExecClearTuple(elemTupleSlot); + memset(elemTupleSlot->tts_isnull, true, + elemTupleSlot->tts_tupleDescriptor->natts * sizeof(bool)); +} + TupleTableSlot *populate_vertex_tts( TupleTableSlot *elemTupleSlot, agtype_value *id, agtype_value *properties) { @@ -139,6 +158,8 @@ TupleTableSlot *populate_vertex_tts( errmsg("vertex id field cannot be NULL"))); } + clear_entity_slot(elemTupleSlot); + properties_isnull = properties == NULL; elemTupleSlot->tts_values[vertex_tuple_id] = GRAPHID_GET_DATUM(id->val.int_value); @@ -174,6 +195,8 @@ TupleTableSlot *populate_edge_tts( errmsg("edge end_id field cannot be NULL"))); } + clear_entity_slot(elemTupleSlot); + properties_isnull = properties == NULL; elemTupleSlot->tts_values[edge_tuple_id] = @@ -318,6 +341,30 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, HeapTuple tuple = NULL; ExecStoreVirtualTuple(elemTupleSlot); + + /* + * The slot's tuple descriptor is the full relation descriptor, which may + * contain columns AGE does not populate itself -- most notably a + * GENERATED ALWAYS ... STORED column added to the label table. Those slot + * entries are left uninitialized by the create/merge/set paths, so we must + * compute the stored generated columns here before materializing the heap + * tuple. Otherwise heap_form_tuple() reads the uninitialized slot values + * and segfaults dereferencing garbage (issue #2450). + */ + if (resultRelInfo->ri_RelationDesc->rd_att->constr != NULL && + resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored) + { + /* + * A generation expression may reference the tableoid system column, so + * the slot must carry the relation's OID before we compute the stored + * generated columns (mirrors PostgreSQL's own ExecInsert path). + */ + elemTupleSlot->tts_tableOid = + RelationGetRelid(resultRelInfo->ri_RelationDesc); + ExecComputeStoredGenerated(resultRelInfo, estate, elemTupleSlot, + CMD_INSERT); + } + tuple = ExecFetchSlotHeapTuple(elemTupleSlot, true, NULL); /* Check the constraints of the tuple */ diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index ac4b5ea5a..8b65bc964 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -119,6 +119,7 @@ typedef struct cypher_merge_custom_scan_state void apply_update_list(CustomScanState *node, cypher_update_information *set_info); +void clear_entity_slot(TupleTableSlot *elemTupleSlot); TupleTableSlot *populate_vertex_tts(TupleTableSlot *elemTupleSlot, agtype_value *id, agtype_value *properties); TupleTableSlot *populate_edge_tts(