@@ -266,4 +266,97 @@ def test_readingorder_multipage():
266266 pred_elements = romodel.predict_reading_order(page_elements=true_elements)
267267 for true_elem, pred_elem in zip(true_elements, pred_elements):
268268 print("true: ", str(true_elem), ", pred: ", str(pred_elem))
269- """
269+ """
270+
271+ def test_close_elements_with_floating_point_precision ():
272+ """
273+ Test that the reading order predictor handles elements that are very close
274+ together, where floating-point precision could cause y_min > y_max.
275+
276+ This reproduces the issue from GitHub issue #140 where:
277+ pelem_j.t: 93.79399999999998
278+ pelem_i.b: 93.793212890625
279+
280+ Without the fix, this would cause an RTreeError because y_min > y_max.
281+ """
282+ from docling_core .types .doc .base import Size , CoordOrigin
283+ from docling_core .types .doc .labels import DocItemLabel
284+
285+ # Create two elements that are very close together vertically
286+ # Element i is above element j, but their boundaries are very close
287+ elem_i = PageElement (
288+ cid = 0 ,
289+ text = "Element I" ,
290+ page_no = 0 ,
291+ page_size = Size (width = 612 , height = 792 ),
292+ label = DocItemLabel .TEXT ,
293+ l = 64.34620666503906 ,
294+ r = 533.4917602539062 ,
295+ t = 80.0 , # top of element i
296+ b = 93.793212890625 , # bottom of element i (very close to top of j)
297+ coord_origin = CoordOrigin .BOTTOMLEFT
298+ )
299+
300+ elem_j = PageElement (
301+ cid = 1 ,
302+ text = "Element J" ,
303+ page_no = 0 ,
304+ page_size = Size (width = 612 , height = 792 ),
305+ label = DocItemLabel .TEXT ,
306+ l = 66.492 ,
307+ r = 525.236 ,
308+ t = 93.79399999999998 , # top of element j (slightly greater than bottom of i!)
309+ b = 110.0 , # bottom of element j
310+ coord_origin = CoordOrigin .BOTTOMLEFT
311+ )
312+
313+ # Create a reading order predictor
314+ romodel = ReadingOrderPredictor ()
315+
316+ # This should not raise an RTreeError
317+ # Before the fix, this would fail with:
318+ # "Coordinates must not have minimums more than maximums"
319+ result = romodel .predict_reading_order (page_elements = [elem_i , elem_j ])
320+
321+ # Verify we got results back
322+ assert len (result ) == 2
323+
324+
325+ def test_identical_boundaries ():
326+ """
327+ Test edge case where two elements have identical vertical boundaries.
328+ Regression test for issue #140.
329+ """
330+ from docling_core .types .doc .base import Size , CoordOrigin
331+ from docling_core .types .doc .labels import DocItemLabel
332+
333+ elem_i = PageElement (
334+ cid = 0 ,
335+ text = "Element I" ,
336+ page_no = 0 ,
337+ page_size = Size (width = 612 , height = 792 ),
338+ label = DocItemLabel .TEXT ,
339+ l = 100.0 ,
340+ r = 200.0 ,
341+ t = 100.0 ,
342+ b = 150.0 ,
343+ coord_origin = CoordOrigin .BOTTOMLEFT
344+ )
345+
346+ elem_j = PageElement (
347+ cid = 1 ,
348+ text = "Element J" ,
349+ page_no = 0 ,
350+ page_size = Size (width = 612 , height = 792 ),
351+ label = DocItemLabel .TEXT ,
352+ l = 250.0 ,
353+ r = 350.0 ,
354+ t = 100.0 , # Same as elem_i.t
355+ b = 150.0 , # Same as elem_i.b
356+ coord_origin = CoordOrigin .BOTTOMLEFT
357+ )
358+
359+ romodel = ReadingOrderPredictor ()
360+ result = romodel .predict_reading_order (page_elements = [elem_i , elem_j ])
361+
362+ assert len (result ) == 2
0 commit comments