Skip to content

Commit bb89dc1

Browse files
authored
Update to official name (#19)
* Use official name * Rename directory * Update pyproject toml * Update import for new elm version * Init compass logger * More name updates * Init compass logger also * Docstring updates * Minor formatting * Docstring updates * Update exceptions * Drop dep on `TEST_DATA_DIR` * Add back in missing imports
1 parent b8c4984 commit bb89dc1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+7642
-1680
lines changed

.github/workflows/lint-python.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ on:
55
branches-ignore:
66
- 'gh-pages'
77
paths:
8-
- scraper/**
8+
- compass/**
99
- pyproject.toml
1010
- .github/workflows/lint-python.yml
1111
pull_request:
1212
paths:
13-
- scraper/**
13+
- compass/**
1414
- pyproject.toml
1515
- .github/workflows/lint-python.yml
1616

@@ -24,9 +24,9 @@ jobs:
2424
with:
2525
version: "latest"
2626
args: "check"
27-
src: "./scraper"
27+
src: "./compass"
2828
- uses: astral-sh/ruff-action@v3
2929
with:
3030
version: "latest"
3131
args: "format --check"
32-
src: "./scraper"
32+
src: "./compass"

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ docs/source/jupyter/*.png
2424
docs/source/jupyter/*_*.rst
2525

2626
# Dynamic version
27-
scraper/_version.py
27+
compass/_version.py
2828

2929
# Byte-compiled / optimized / DLL files
3030
__pycache__/
File renamed without changes.

scraper/cli.py renamed to compass/cli.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import multiprocessing
99
from pathlib import Path
1010

11-
from scraper import __version__
12-
from scraper.process import process_counties_with_openai
11+
from compass import __version__
12+
from compass.process import process_counties_with_openai
1313

1414

1515
@click.group()
@@ -28,7 +28,7 @@ def main(ctx):
2828
type=click.Path(exists=True),
2929
help="Path to ordinance configuration JSON file. This file "
3030
"should contain any/all the arguments to pass to "
31-
":func:`scraper.process.process_counties_with_openai`.",
31+
":func:`compass.process.process_counties_with_openai`.",
3232
)
3333
@click.option(
3434
"-v",
@@ -43,9 +43,10 @@ def process(config, verbose):
4343
config = json.load(fh)
4444

4545
if verbose:
46-
logger = logging.getLogger("scraper")
47-
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
48-
logger.setLevel(config.get("log_level", "INFO"))
46+
for lib in ["compass", "elm"]:
47+
logger = logging.getLogger(lib)
48+
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
49+
logger.setLevel(config.get("log_level", "INFO"))
4950

5051
# Need to set start method to "spawn" instead of "fork" for unix
5152
# systems. If this call is not present, software hangs when process
Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import logging
44

55
from elm.web.document import PDFDocument
6-
from elm.web.google_search import google_results_as_docs, filter_documents
6+
from elm.web.search.run import web_search_links_as_docs
7+
from elm.web.utilities import filter_documents
78

8-
from scraper.llm import StructuredLLMCaller
9-
from scraper.extraction import check_for_ordinance_info
10-
from scraper.services.threaded import TempFileCache
11-
from scraper.validation.location import CountyValidator
9+
from compass.llm import StructuredLLMCaller
10+
from compass.extraction import check_for_ordinance_info
11+
from compass.services.threaded import TempFileCache
12+
from compass.validation.location import CountyValidator
1213

1314

1415
logger = logging.getLogger(__name__)
@@ -34,7 +35,7 @@ async def download_county_ordinance(
3435
3536
Parameters
3637
----------
37-
location : :class:`scraper.utilities.location.Location`
38+
location : :class:`compass.utilities.location.Location`
3839
Location objects representing the county.
3940
text_splitter : obj, optional
4041
Instance of an object that implements a `split_text` method.
@@ -50,15 +51,15 @@ async def download_county_ordinance(
5051
Dictionary of keyword-argument pairs to initialize
5152
:class:`elm.web.file_loader.AsyncFileLoader` with. If found, the
5253
"pw_launch_kwargs" key in these will also be used to initialize
53-
the :class:`elm.web.google_search.PlaywrightGoogleLinkSearch`
54+
the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch`
5455
used for the google URL search. By default, ``None``.
5556
browser_semaphore : :class:`asyncio.Semaphore`, optional
5657
Semaphore instance that can be used to limit the number of
5758
playwright browsers open concurrently. If ``None``, no limits
5859
are applied. By default, ``None``.
5960
**kwargs
6061
Keyword-value pairs used to initialize an
61-
`scraper.llm.LLMCaller` instance.
62+
`compass.llm.LLMCaller` instance.
6263
6364
Returns
6465
-------
@@ -101,7 +102,7 @@ async def _docs_from_google_search(
101102
"file_cache_coroutine": TempFileCache.call,
102103
}
103104
)
104-
return await google_results_as_docs(
105+
return await web_search_links_as_docs(
105106
queries,
106107
num_urls=num_urls,
107108
browser_semaphore=browser_semaphore,

compass/exceptions.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""Custom Exceptions and Errors for COMPASS"""
2+
3+
import logging
4+
5+
6+
logger = logging.getLogger("compass")
7+
8+
9+
class COMPASSError(Exception):
10+
"""Generic COMPASS Error"""
11+
12+
def __init__(self, *args, **kwargs):
13+
"""Init exception and broadcast message to logger"""
14+
super().__init__(*args, **kwargs)
15+
if args:
16+
logger.error(str(args[0]), stacklevel=2)
17+
18+
19+
class COMPASSNotInitializedError(COMPASSError):
20+
"""COMPASS not initialized error"""
21+
22+
23+
class COMPASSValueError(COMPASSError, ValueError):
24+
"""COMPASS ValueError"""
Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import logging
44
from warnings import warn
55

6-
from scraper.llm import LLMCaller, StructuredLLMCaller
7-
from scraper.extraction.date import DateExtractor
8-
from scraper.extraction.ordinance import (
6+
from compass.llm import LLMCaller, StructuredLLMCaller
7+
from compass.extraction.date import DateExtractor
8+
from compass.extraction.ordinance import (
99
OrdinanceValidator,
1010
OrdinanceExtractor,
1111
)
12-
from scraper.extraction.parse import StructuredOrdinanceParser
12+
from compass.extraction.parse import StructuredOrdinanceParser
1313

1414

1515
logger = logging.getLogger(__name__)
@@ -33,7 +33,7 @@ async def check_for_ordinance_info(doc, text_splitter, **kwargs):
3333
input.
3434
**kwargs
3535
Keyword-value pairs used to initialize an
36-
`scraper.llm.LLMCaller` instance.
36+
`compass.llm.LLMCaller` instance.
3737
3838
Returns
3939
-------
@@ -72,7 +72,7 @@ async def extract_ordinance_text_with_llm(doc, text_splitter, extractor):
7272
A document known to contain ordinance information. This means it
7373
must contain an ``"ordinance_text"`` key in the metadata. You
7474
can run
75-
:func:`~scraper.extraction.apply.check_for_ordinance_info`
75+
:func:`~compass.extraction.apply.check_for_ordinance_info`
7676
to have this attribute populated automatically for documents
7777
that are found to contain ordinance data. Note that if the
7878
document's metadata does not contain the ``"ordinance_text"``
@@ -82,8 +82,8 @@ async def extract_ordinance_text_with_llm(doc, text_splitter, extractor):
8282
The method should take text as input (str) and return a list
8383
of text chunks. Langchain's text splitters should work for this
8484
input.
85-
extractor : scraper.extraction.ordinance.OrdinanceExtractor
86-
Instance of `~scraper.extraction.ordinance.OrdinanceExtractor`
85+
extractor : compass.extraction.ordinance.OrdinanceExtractor
86+
Instance of `~compass.extraction.ordinance.OrdinanceExtractor`
8787
used for ordinance text extraction.
8888
8989
Returns
@@ -130,7 +130,7 @@ async def extract_ordinance_text_with_ngram_validation(
130130
A document known to contain ordinance information. This means it
131131
must contain an ``"ordinance_text"`` key in the metadata. You
132132
can run
133-
:func:`~scraper.extraction.apply.check_for_ordinance_info`
133+
:func:`~compass.extraction.apply.check_for_ordinance_info`
134134
to have this attribute populated automatically for documents
135135
that are found to contain ordinance data. Note that if the
136136
document's metadata does not contain the ``"ordinance_text"``
@@ -156,7 +156,7 @@ async def extract_ordinance_text_with_ngram_validation(
156156
By default, ``0.95``.
157157
**kwargs
158158
Keyword-value pairs used to initialize an
159-
`scraper.llm.LLMCaller` instance.
159+
`compass.llm.LLMCaller` instance.
160160
161161
Returns
162162
-------
@@ -199,7 +199,7 @@ async def _extract_with_ngram_check(
199199
ngram_fraction_threshold=0.95,
200200
):
201201
"""Extract ordinance info from doc and validate using ngrams."""
202-
from scraper.extraction.ngrams import sentence_ngram_containment # noqa
202+
from compass.extraction.ngrams import sentence_ngram_containment # noqa
203203

204204
source = doc.metadata.get("source", "Unknown")
205205
og_text = doc.metadata["ordinance_text"]
@@ -277,14 +277,14 @@ async def extract_ordinance_values(doc, **kwargs):
277277
A document known to contain ordinance text. This means it must
278278
contain an ``"cleaned_ordinance_text"`` key in the metadata. You
279279
can run
280-
:func:`~scraper.extraction.apply.extract_ordinance_text_with_llm`
280+
:func:`~compass.extraction.apply.extract_ordinance_text_with_llm`
281281
to have this attribute populated automatically for documents
282282
that are found to contain ordinance data. Note that if the
283283
document's metadata does not contain the
284284
``"cleaned_ordinance_text"`` key, it will not be processed.
285285
**kwargs
286286
Keyword-value pairs used to initialize an
287-
`scraper.llm.LLMCaller` instance.
287+
`compass.llm.LLMCaller` instance.
288288
289289
Returns
290290
-------
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(self, structured_llm_caller):
3030
3131
Parameters
3232
----------
33-
structured_llm_caller : scraper.llm.StructuredLLMCaller
33+
structured_llm_caller : compass.llm.StructuredLLMCaller
3434
StructuredLLMCaller instance. Used for structured validation
3535
queries.
3636
"""

0 commit comments

Comments
 (0)