Skip to content

Commit e963fca

Browse files
committed
Update document sorting key
1 parent e74c088 commit e963fca

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

compass/download.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
from compass.llm import StructuredLLMCaller
1010
from compass.extraction import check_for_ordinance_info
1111
from compass.services.threaded import TempFileCache
12-
from compass.validation.location import CountyValidator
12+
from compass.validation.location import (
13+
CountyJurisdictionValidator,
14+
CountyNameValidator,
15+
CountyValidator,
16+
)
1317

1418

1519
logger = logging.getLogger(__name__)
@@ -153,6 +157,24 @@ def _sort_final_ord_docs(all_ord_docs):
153157

154158

155159
def _ord_doc_sorting_key(doc):
156-
"""All text sorting key"""
157-
year, month, day = doc.metadata.get("date", (-1, -1, -1))
158-
return year, isinstance(doc, PDFDocument), -1 * len(doc.text), month, day
160+
"""Sorting key for documents. The higher this value, the better"""
161+
latest_year, latest_month, latest_day = doc.metadata.get(
162+
"date", (-1, -1, -1)
163+
)
164+
prefer_pdf_files = isinstance(doc, PDFDocument)
165+
highest_name_score = doc.metadata.get(
166+
CountyNameValidator.META_SCORE_KEY, 0
167+
)
168+
highest_jurisdiction_score = doc.metadata.get(
169+
CountyJurisdictionValidator.META_SCORE_KEY, 0
170+
)
171+
shortest_text_length = -1 * len(doc.text)
172+
return (
173+
latest_year,
174+
prefer_pdf_files,
175+
highest_name_score,
176+
highest_jurisdiction_score,
177+
shortest_text_length,
178+
latest_month,
179+
latest_day,
180+
)

0 commit comments

Comments
 (0)