|
9 | 9 | from compass.llm import StructuredLLMCaller |
10 | 10 | from compass.extraction import check_for_ordinance_info |
11 | 11 | from compass.services.threaded import TempFileCache |
12 | | -from compass.validation.location import CountyValidator |
| 12 | +from compass.validation.location import ( |
| 13 | + CountyJurisdictionValidator, |
| 14 | + CountyNameValidator, |
| 15 | + CountyValidator, |
| 16 | +) |
13 | 17 |
|
14 | 18 |
|
15 | 19 | logger = logging.getLogger(__name__) |
@@ -153,6 +157,24 @@ def _sort_final_ord_docs(all_ord_docs): |
153 | 157 |
|
154 | 158 |
|
155 | 159 | def _ord_doc_sorting_key(doc): |
156 | | - """All text sorting key""" |
157 | | - year, month, day = doc.metadata.get("date", (-1, -1, -1)) |
158 | | - return year, isinstance(doc, PDFDocument), -1 * len(doc.text), month, day |
| 160 | + """Sorting key for documents. The higher this value, the better""" |
| 161 | + latest_year, latest_month, latest_day = doc.metadata.get( |
| 162 | + "date", (-1, -1, -1) |
| 163 | + ) |
| 164 | + prefer_pdf_files = isinstance(doc, PDFDocument) |
| 165 | + highest_name_score = doc.metadata.get( |
| 166 | + CountyNameValidator.META_SCORE_KEY, 0 |
| 167 | + ) |
| 168 | + highest_jurisdiction_score = doc.metadata.get( |
| 169 | + CountyJurisdictionValidator.META_SCORE_KEY, 0 |
| 170 | + ) |
| 171 | + shortest_text_length = -1 * len(doc.text) |
| 172 | + return ( |
| 173 | + latest_year, |
| 174 | + prefer_pdf_files, |
| 175 | + highest_name_score, |
| 176 | + highest_jurisdiction_score, |
| 177 | + shortest_text_length, |
| 178 | + latest_month, |
| 179 | + latest_day, |
| 180 | + ) |
0 commit comments