11import csv
22from collections import defaultdict
3- from functools import cmp_to_key
43from io import IOBase , StringIO
5- from typing import Dict , Iterable , Set
4+ from typing import Any , Dict , Iterable , Set
65
76from cassis import Cas
8- from cassis .typesystem import FEATURE_BASE_NAME_SOFA , TYPE_NAME_ANNOTATION , FeatureStructure , Type , is_array
7+ from cassis .typesystem import (
8+ FEATURE_BASE_NAME_SOFA ,
9+ TYPE_NAME_ANNOTATION ,
10+ FeatureStructure ,
11+ Type ,
12+ is_annotation ,
13+ is_array ,
14+ )
915
1016_EXCLUDED_FEATURES = {FEATURE_BASE_NAME_SOFA }
1117_NULL_VALUE = "<NULL>"
@@ -74,7 +80,7 @@ def _render_feature_structure(
7480) -> []:
7581 row_data = [fs_id_to_anchor .get (fs .xmiID )]
7682
77- if max_covered_text > 0 and _is_annotation_fs (fs ):
83+ if max_covered_text > 0 and is_annotation (fs ):
7884 covered_text = fs .get_covered_text ()
7985 if covered_text and len (covered_text ) >= max_covered_text :
8086 prefix = covered_text [0 : (max_covered_text // 2 )]
@@ -143,7 +149,19 @@ def _generate_anchors(
143149 for t in types_sorted :
144150 type_ = cas .typesystem .get_type (t )
145151 feature_structures = all_feature_structures_by_type [type_ .name ]
146- feature_structures .sort (key = cmp_to_key (lambda a , b : _compare_fs (type_ , a , b )))
152+ # Sort deterministically using a stable key function. We avoid using
153+ # the comparator-based approach to prevent unpredictable comparisons
154+ # between mixed types during lexicographic tuple comparisons.
155+ feature_structures .sort (
156+ key = lambda fs : (
157+ 0 ,
158+ fs .begin ,
159+ fs .end ,
160+ str (_feature_structure_hash (type_ , fs )),
161+ )
162+ if is_annotation (fs )
163+ else (1 , None , None , str (_feature_structure_hash (type_ , fs )))
164+ )
147165
148166 for fs in feature_structures :
149167 add_index_mark = mark_indexed and fs in indexed_feature_structures
@@ -159,7 +177,7 @@ def _generate_anchors(
159177def _generate_anchor (fs : FeatureStructure , add_index_mark : bool ) -> str :
160178 anchor = fs .type .name .rsplit ("." , 2 )[- 1 ] # Get the short type name (no package)
161179
162- if _is_annotation_fs (fs ):
180+ if is_annotation (fs ):
163181 anchor += f"[{ fs .begin } -{ fs .end } ]"
164182
165183 if add_index_mark :
@@ -171,7 +189,7 @@ def _generate_anchor(fs: FeatureStructure, add_index_mark: bool) -> str:
171189 return anchor
172190
173191
174- def _is_primitive_value (value : any ) -> bool :
192+ def _is_primitive_value (value : Any ) -> bool :
175193 return type (value ) in (int , float , bool , str )
176194
177195
@@ -182,65 +200,34 @@ def _is_array_fs(fs: FeatureStructure) -> bool:
182200 return is_array (fs .type )
183201
184202
185- def _is_annotation_fs (fs : FeatureStructure ) -> bool :
186- return hasattr (fs , "begin" ) and isinstance (fs .begin , int ) and hasattr (fs , "end" ) and isinstance (fs .end , int )
187-
188-
189- def _compare_fs (type_ : Type , a : FeatureStructure , b : FeatureStructure ) -> int :
190- if a is b :
191- return 0
192-
193- # duck-typing check if something is a annotation - if yes, try sorting by offets
194- fs_a_is_annotation = _is_annotation_fs (a )
195- fs_b_is_annotation = _is_annotation_fs (b )
196- if fs_a_is_annotation != fs_b_is_annotation :
197- return - 1
198- if fs_a_is_annotation and fs_b_is_annotation :
199- begin_cmp = a .begin - b .begin
200- if begin_cmp != 0 :
201- return begin_cmp
202-
203- begin_cmp = b .end - a .end
204- if begin_cmp != 0 :
205- return begin_cmp
206-
207- # Alternative implementation
208- # Doing arithmetics on the hash value as we have done with the offsets does not work because the hashes do not
209- # provide a global order. Hence, we map all results to 0, -1 and 1 here.
210- fs_hash_a = _feature_structure_hash (type_ , a )
211- fs_hash_b = _feature_structure_hash (type_ , b )
212- if fs_hash_a == fs_hash_b :
213- return 0
214- return - 1 if fs_hash_a < fs_hash_b else 1
215-
216-
217203def _feature_structure_hash (type_ : Type , fs : FeatureStructure ):
218- hash_ = 0
204+ # For backward compatibility keep a function that returns a stable string
205+ # representation of the FS contents. This is used as a deterministic
206+ # tie-breaker when sorting. We avoid returning complex nested tuples to
207+ # keep comparisons simple and stable across original and deserialized CASes.
208+ def _render_val (v ):
209+ if v is None :
210+ return "<NULL>"
211+ if type (v ) in (int , float , bool , str ):
212+ return str (v )
213+ if _is_array_fs (v ):
214+ # Join element representations with '|'
215+ return "[" + "," .join (_render_val (e ) for e in (v .elements or [])) + "]"
216+ # Feature structure reference
217+ try :
218+ if is_annotation (v ):
219+ return f"{ v .type .name } @{ v .begin } -{ v .end } "
220+ else :
221+ return f"{ v .type .name } "
222+ except Exception :
223+ return str (v )
224+
219225 if _is_array_fs (fs ):
220- return len (fs .elements ) if fs . elements else 0
226+ return _render_val (fs .elements or [])
221227
222- # Should be possible to get away with not sorting here assuming that all_features returns the features always in
223- # the same order
228+ parts : list [str ] = []
224229 for feature in type_ .all_features :
225230 if feature .name == FEATURE_BASE_NAME_SOFA :
226231 continue
227-
228- feature_value = getattr (fs , feature .name )
229-
230- if _is_array_fs (feature_value ):
231- if feature_value .elements is not None :
232- for element in feature_value .elements :
233- hash_ = _feature_value_hash (feature_value , hash_ )
234- else :
235- hash_ = _feature_value_hash (feature_value , hash_ )
236- return hash_
237-
238-
239- def _feature_value_hash (feature_value : any , hash_ : int ):
240- # Note we do not recurse further into arrays here because that could lead to endless loops!
241- if type (feature_value ) in (int , float , bool , str ):
242- return hash_ + hash (feature_value )
243- else :
244- # If we get here, it is a feature structure reference... we cannot really recursively
245- # go into it to calculate a recursive hash... so we just check if the value is non-null
246- return hash_ * (- 1 if feature_value is None else 1 )
232+ parts .append (_render_val (getattr (fs , feature .name )))
233+ return "|" .join (parts )
0 commit comments