66
77"""
88
9+ import sqlalchemy as sa
910from alembic import op
1011
1112# revision identifiers, used by Alembic.
@@ -19,6 +20,122 @@ def upgrade():
1920 op .drop_constraint ("ux_neb_specification_keys" , "neb_specification" , type_ = "unique" )
2021 op .drop_constraint ("ux_reaction_specification_keys" , "reaction_specification" , type_ = "unique" )
2122
23+ # We need to find duplicates due to a NULL optimization_specification_id
24+ # then update references and delete
25+ op .execute (sa .text ("""
26+ WITH ranked AS (
27+ SELECT
28+ id,
29+ MIN(id) OVER (
30+ PARTITION BY specification_hash, singlepoint_specification_id
31+ ) AS canonical_id
32+ FROM neb_specification
33+ WHERE optimization_specification_id IS NULL
34+ ),
35+ dupes AS (
36+ SELECT id AS duplicate_id, canonical_id
37+ FROM ranked
38+ WHERE id <> canonical_id
39+ )
40+ UPDATE neb_record r
41+ SET specification_id = d.canonical_id
42+ FROM dupes d
43+ WHERE r.specification_id = d.duplicate_id;
44+ """ ))
45+
46+ op .execute (sa .text ("""
47+ WITH ranked AS (
48+ SELECT
49+ id,
50+ MIN(id) OVER (
51+ PARTITION BY specification_hash, singlepoint_specification_id
52+ ) AS canonical_id
53+ FROM neb_specification
54+ WHERE optimization_specification_id IS NULL
55+ ),
56+ dupes AS (
57+ SELECT id AS duplicate_id, canonical_id
58+ FROM ranked
59+ WHERE id <> canonical_id
60+ )
61+ UPDATE neb_dataset_specification rds
62+ SET specification_id = d.canonical_id
63+ FROM dupes d
64+ WHERE rds.specification_id = d.duplicate_id;
65+ """ ))
66+
67+ op .execute (sa .text ("""
68+ WITH ranked AS (
69+ SELECT
70+ id,
71+ MIN(id) OVER (
72+ PARTITION BY specification_hash, singlepoint_specification_id
73+ ) AS canonical_id
74+ FROM neb_specification
75+ WHERE optimization_specification_id IS NULL
76+ )
77+ DELETE FROM neb_specification r
78+ USING ranked d
79+ WHERE r.id = d.id
80+ AND d.id <> d.canonical_id;
81+ """ ))
82+
83+ # Now for reactions, which can have singlepoint or optimization specification ids be null
84+ op .execute (sa .text ("""
85+ WITH ranked AS (
86+ SELECT
87+ id,
88+ MIN(id) OVER (
89+ PARTITION BY specification_hash, singlepoint_specification_id, optimization_specification_id
90+ ) AS canonical_id
91+ FROM reaction_specification
92+ ),
93+ dupes AS (
94+ SELECT id AS duplicate_id, canonical_id
95+ FROM ranked
96+ WHERE id <> canonical_id
97+ )
98+ UPDATE reaction_record r
99+ SET specification_id = d.canonical_id
100+ FROM dupes d
101+ WHERE r.specification_id = d.duplicate_id;
102+ """ ))
103+
104+ op .execute (sa .text ("""
105+ WITH ranked AS (
106+ SELECT
107+ id,
108+ MIN(id) OVER (
109+ PARTITION BY specification_hash, singlepoint_specification_id, optimization_specification_id
110+ ) AS canonical_id
111+ FROM reaction_specification
112+ ),
113+ dupes AS (
114+ SELECT id AS duplicate_id, canonical_id
115+ FROM ranked
116+ WHERE id <> canonical_id
117+ )
118+ UPDATE reaction_dataset_specification rds
119+ SET specification_id = d.canonical_id
120+ FROM dupes d
121+ WHERE rds.specification_id = d.duplicate_id;
122+ """ ))
123+
124+ op .execute (sa .text ("""
125+ WITH ranked AS (
126+ SELECT
127+ id,
128+ MIN(id) OVER (
129+ PARTITION BY specification_hash, singlepoint_specification_id, optimization_specification_id
130+ ) AS canonical_id
131+ FROM reaction_specification
132+ )
133+ DELETE FROM reaction_specification r
134+ USING ranked d
135+ WHERE r.id = d.id
136+ AND d.id <> d.canonical_id;
137+ """ ))
138+
22139 op .create_unique_constraint (
23140 "ux_neb_specification_keys" ,
24141 "neb_specification" ,
0 commit comments