From cbed384664ddb6f3de3ce55762ec913c4337107f Mon Sep 17 00:00:00 2001 From: Guillermo-Vargas_shell Date: Sat, 12 Apr 2025 15:39:33 -0500 Subject: [PATCH 1/3] Using concurrent futures for processing multiple las files in parallel --- .gitignore | 3 +++ welly/project.py | 49 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index e8d514ab..e9684762 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,6 @@ csv-plugin.xml # temporary test folder tests/temp + +# UV related files +uv.lock \ No newline at end of file diff --git a/welly/project.py b/welly/project.py index 9befd2b5..db16b338 100644 --- a/welly/project.py +++ b/welly/project.py @@ -21,6 +21,20 @@ from .plot import plot_kdes_project, plot_map_project +def _load_well_from_las(filepath, remap=None, funcs=None, data=True, req=None, alias=None, encoding=None, printfname=None, index=None, **kwargs): + """Helper function for concurrent well loading.""" + return Well.from_las(filepath, + remap=remap, + funcs=funcs, + data=data, + req=req, + alias=alias, + encoding=encoding, + printfname=printfname, + index=index, + **kwargs) + + class Project(object): """ Just a list of Well objects. @@ -163,6 +177,9 @@ def from_las(cls, Returns: project. The project object. """ + import concurrent.futures + from tqdm import tqdm + if max is None: max = 1e12 if (req is not None) and (alias is None): @@ -180,20 +197,24 @@ def from_las(cls, else: uris = path # It's a list-like of files and/or URLs. - wells = [Well.from_las(f, - remap=remap, - funcs=funcs, - data=data, - req=req, - alias=alias, - encoding=encoding, - printfname=printfname, - index=index, - **kwargs, - ) - for i, f in tqdm(enumerate(uris)) if i < max] - - return cls(list(filter(None, wells))) + # Limit to the maximum number of wells requested + uris = [f for i, f in enumerate(uris) if i < max] + + wells = [] + with concurrent.futures.ProcessPoolExecutor() as executor: + # Submit all tasks and create a mapping of futures to original indices + future_to_idx = {executor.submit(_load_well_from_las, uri, remap=remap, funcs=funcs, data=data, req=req, alias=alias, encoding=encoding, printfname=printfname, index=index, **kwargs): i for i, uri in enumerate(uris)} + + # Use tqdm to show a progress bar + for future in tqdm(concurrent.futures.as_completed(future_to_idx), total=len(uris), desc="Loading wells"): + try: + well = future.result() + if well is not None: + wells.append(well) + except Exception as e: + print(f"Error loading well: {e}") + + return cls(wells, source=path) def add_canstrat_striplogs(self, path, uwi_transform=None, name='canstrat'): """ From a6a3bea0387975bb6addcb79ef3f35484840e3ac Mon Sep 17 00:00:00 2001 From: Guillermo-Vargas_shell Date: Sun, 13 Apr 2025 01:21:55 -0500 Subject: [PATCH 2/3] Fixing URL sourced LAS for testing --- .gitignore | 3 ++- pyproject.toml | 9 +++++---- welly/las.py | 10 +++++----- welly/project.py | 26 ++++++++++++++++---------- welly/well.py | 14 +++++++++----- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index e9684762..6aa2707b 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,5 @@ csv-plugin.xml tests/temp # UV related files -uv.lock \ No newline at end of file +uv.lock +.python-version \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 92330e00..6be62264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,3 @@ -[build-system] -requires = ["hatchling", "hatch-vcs"] -build-backend = "hatchling.build" - [project] name = "welly" dynamic = ["version"] @@ -22,6 +18,7 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent" ] +requires-python = ">=3.9" dependencies = [ "numpy", "scipy", @@ -61,3 +58,7 @@ testpaths = ["tests"] [tool.setuptools_scm] write_to = "welly/_version.py" git_describe_command = "git describe --dirty --tags --long --match v* --first-parent" + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" diff --git a/welly/las.py b/welly/las.py index 07beda78..3b9fc781 100644 --- a/welly/las.py +++ b/welly/las.py @@ -267,7 +267,7 @@ def datasets_to_las(path, datasets, **kwargs): for dataset_name, df in datasets.items(): # dataset is the header - if dataset_name == 'Header': + if (dataset_name == 'Header'): # parse header pd.DataFrame to LASFile for section_name in set(df.section.values): # get header section df @@ -482,11 +482,11 @@ def file_from_url(url): text_file (StringIO): an in-memory stream for text. """ try: - text_file = StringIO(request.urlopen(url).read().decode()) + with request.urlopen(url) as response: + content = response.read().decode() + return content # Return the content directly instead of wrapping in StringIO except error.HTTPError as e: - raise Exception('Could not retrieve url: ', e) - - return text_file + raise Exception(f'Could not retrieve url: {url} - {e}') def get_las_version(las): diff --git a/welly/project.py b/welly/project.py index db16b338..f9f9a41e 100644 --- a/welly/project.py +++ b/welly/project.py @@ -23,16 +23,22 @@ def _load_well_from_las(filepath, remap=None, funcs=None, data=True, req=None, alias=None, encoding=None, printfname=None, index=None, **kwargs): """Helper function for concurrent well loading.""" - return Well.from_las(filepath, - remap=remap, - funcs=funcs, - data=data, - req=req, - alias=alias, - encoding=encoding, - printfname=printfname, - index=index, - **kwargs) + try: + # Handle URLs directly in the subprocess to avoid file handle issues + # when passing file objects between processes + return Well.from_las(filepath, + remap=remap, + funcs=funcs, + data=data, + req=req, + alias=alias, + encoding=encoding, + printfname=printfname, + index=index, + **kwargs) + except Exception as e: + print(f"Error loading well {filepath}: {e}") + return None class Project(object): diff --git a/welly/well.py b/welly/well.py index 6a161a4a..7c0501cc 100644 --- a/welly/well.py +++ b/welly/well.py @@ -298,11 +298,15 @@ def from_las(cls, if printfname: print(fname) - # If https URL is passed try reading and formatting it to text file. - if re.match(r'https?://.+\..+/.+?', fname) is not None: - fname = file_from_url(fname) - - datasets = from_las(fname, encoding=encoding, **kwargs) + # If https URL is passed, download the content + is_url = re.match(r'https?://.+\..+/.+?', fname) is not None + if is_url: + content = file_from_url(fname) + # Pass the content string directly to from_las + datasets = from_las(content, encoding=encoding, **kwargs) + else: + # Regular file path + datasets = from_las(fname, encoding=encoding, **kwargs) # Create well from datasets. well = cls.from_datasets(datasets, From 0e3f8127303275d55b709c7bafa9556bdb2ee6bf Mon Sep 17 00:00:00 2001 From: Frank Male Date: Tue, 15 Jul 2025 23:14:49 -0400 Subject: [PATCH 3/3] Update welly/las.py --- welly/las.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/welly/las.py b/welly/las.py index 3b9fc781..4c05b41a 100644 --- a/welly/las.py +++ b/welly/las.py @@ -267,7 +267,7 @@ def datasets_to_las(path, datasets, **kwargs): for dataset_name, df in datasets.items(): # dataset is the header - if (dataset_name == 'Header'): + if dataset_name == 'Header': # parse header pd.DataFrame to LASFile for section_name in set(df.section.values): # get header section df