GeoSander
diff --git a/‎pygeoapi/api/joins.py‎
Lines changed: 4 additions & 1 deletion b/‎pygeoapi/api/joins.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎pygeoapi/join/manager.py‎
Lines changed: 105 additions & 81 deletions b/‎pygeoapi/join/manager.py‎
Lines changed: 105 additions & 81 deletions
@@ -30,7 +30,10 @@
 from typing import Any
 from datetime import datetime, timedelta, timezone
 
-from pygeoapi import l10n, join_util
+from pygeoapi import l10n
+from pygeoapi.join.manager import (
+    JoinManager, JoinSourceNotFoundError, JoinSourceMissingError
+)
 from pygeoapi.api import (
     APIRequest, API, SYSTEM_LOCALE, FORMAT_TYPES,
     F_JSON, F_JSONLD, F_HTML, HTTPStatus
 
@@ -175,18 +175,24 @@ def _build_refs(self):
         with self._db() as db:
             for file in self.source_dir.iterdir():
                 if file.is_file() and _SOURCE_FILE_PATTERN.match(file.name):
-                    with open(file, 'r') as f:
-                        data = json.load(f)
-                        doc = {
-                            'id': data['id'],
-                            'collectionId': data['collectionId'],
-                            'timeStamp': data['timeStamp'],
-                            'joinSource': data['joinSource'],
-                            'ref': str(file)
-                        }
-                        q = Query()
-                        db.upsert(doc, q.id == data['id'] & q.collectionId == data['collectionId'])  # noqa
-                        result.setdefault(data['collectionId'], {})[data['id']] = doc  # noqa
+                    with FileLock(file.with_suffix(file.suffix + '.lock')):
+                        with open(file, 'r') as f:
+                            try:
+                                data = json.load(f)
+                                doc = {
+                                    'id': data['id'],
+                                    'collectionId': data['collectionId'],
+                                    'timeStamp': data['timeStamp'],
+                                    'joinSource': data['joinSource'],
+                                    'ref': str(file)
+                                }
+                            except Exception as e:
+                                # Ignore file if not valid JSON
+                                LOGGER.debug(str(e), exc_info=True)
+                                continue
+                            q = Query()
+                            db.upsert(doc, (q.id == data['id']) & (q.collectionId == data['collectionId']))  # noqa
+                            result.setdefault(data['collectionId'], {})[data['id']] = doc  # noqa
         return result
 
     @staticmethod
@@ -204,34 +210,17 @@ def _delete_source(path: Path, silent: bool = False) -> bool:
             LOGGER.debug(f'file {path} already removed')
             return True
 
-        try:
-            # Acquire exclusive lock before deletion
-            # This prevents readers from starting while we delete
-            lock = FileLock(path)
-
-            with lock.acquire(timeout=10):
-                # Remove file
-                try:
-                    path.unlink(missing_ok=True)
-                    LOGGER.debug(f'removed join source file: {path}')
-                except Exception as e:
-                    LOGGER.warning(f'failed to remove join source {path}: {e}')
-                    if not silent:
-                        raise
-                    else:
-                        return False
-                finally:
-                    # Remove lock file
-                    lock_file = path.with_suffix(path.suffix + '.lock')
-                    lock_file.unlink(missing_ok=True)
-
-        except Exception as e:
-            LOGGER.error(f'Error during file deletion: {e}',
-                         exc_info=True)
-            if not silent:
-                raise
-            else:
-                return False
+        with FileLock(path.with_suffix(path.suffix + '.lock')):
+            # Remove file
+            try:
+                path.unlink(missing_ok=True)
+                LOGGER.debug(f'removed join source file: {path}')
+            except Exception as e:
+                LOGGER.warning(f'failed to remove join source {path}: {e}')
+                if not silent:
+                    raise
+                else:
+                    return False
 
         return True
 
@@ -252,7 +241,7 @@ def _cleanup_sources(self):
                 # and output as tuple (timestamp, id, ref)
                 source_items = sorted(
                     [(util.str_to_datetime(info['timeStamp']), info['id'],
-                      info['ref']) for info in sources.values()],
+                      Path(info['ref'])) for info in sources.values()],
                     key=itemgetter(0)
                 )
 
@@ -262,16 +251,16 @@ def _cleanup_sources(self):
                         if now - timestamp <= max_age:
                             continue
                         if self._delete_source(ref, True):
-                            db.remove(q.collectionId == collection_id & q.id == source_id)  # noqa
+                            db.remove((q.collectionId == collection_id) & (q.id == source_id))  # noqa
                             LOGGER.debug(f'removed stale source: {ref}')
                         else:
                             LOGGER.warning(f'could not remove stale source: {ref}')  # noqa
 
                 # pass 2: limit by max_files (if configured)
                 if 0 < self.max_files < len(sources):
-                    for _, ref in list(reversed(source_items))[:self.max_files]:  # noqa
+                    for _, source_id, ref in list(reversed(source_items))[:self.max_files]:  # noqa
                         if self._delete_source(ref, True):
-                            db.remove(q.collectionId == collection_id & q.id == source_id)  # noqa
+                            db.remove((q.collectionId == collection_id) & (q.id == source_id))  # noqa
                             LOGGER.debug(f'removed stale source: {ref}')
                         else:
                             LOGGER.warning(f'could not remove stale source: {ref}')  # noqa
@@ -288,6 +277,38 @@ def _make_source_path(self, join_id: str) -> Path:
         json_file = self.source_dir / f'table-{join_id}.json'
         return json_file
 
+    def _find_source_path(self, collection_id: str, join_id: str) -> Path:
+        """
+        Finds the path to a join source file on disk.
+        Raises a JoinSourceNotFoundError if the source is not found.
+        Raises a JoinSourceMissingError if the source reference is missing.
+
+        :param collection_id: Collection identifier
+        :param join_id: Join source identifier
+
+        :returns: `Path` instance to the join source file
+        """
+        with self._db() as db:
+            q = Query()
+            result = db.get((q.id == join_id) &
+                            (q.collectionId == collection_id))
+            if not result:
+                raise JoinSourceNotFoundError(
+                    f'join source {join_id} not found for collection {collection_id}')  # noqa
+
+            file_path = Path(result['ref'])
+
+            # Verify file still exists
+            if not file_path.is_file():
+                # Clean up orphaned database entry
+                LOGGER.warning(f'Join source file missing: {file_path}')
+                db.remove((q.id == join_id) &
+                          (q.collectionId == collection_id))
+                raise JoinSourceMissingError(
+                    f'join source {join_id} for collection {collection_id} was removed')  # noqa
+
+            return file_path
+
     @staticmethod
     def _valid_id(join_id: Any) -> bool:
         """
@@ -308,6 +329,23 @@ def process_csv(self, collection_id: str,
         Processes the CSV form data and stores the result as a JSON file
         in a temporary directory.
 
+        Example response:
+        {
+            "id": "13b40adb-aef3-4f6b-8d32-acf3ab082d2d",
+            "timeStamp": "2025-12-10T12:26:17.542928Z",
+            "collectionId": "cities",
+            "collectionKey": "id",
+            "joinSource": "city_data.csv",
+            "joinKey": "city_id",
+            "joinFields": ["city_name", "population"],
+            "numberOfRows": 50,
+            "data": {
+                "12345": ["Amsterdam", "1100000"],
+                "67890": ["Rotterdam", "650000"]
+            }
+        }
+
+
         :param collection_id: collection name to apply join source to
         :param collection_provider: feature collection provider
         :param form_data: parameters dict (from request form data)
@@ -364,8 +402,9 @@ def process_csv(self, collection_id: str,
         LOGGER.debug('Reading CSV data from stream')
         try:
             # Wrap binary stream in TextIOWrapper for reading
-            # TODO: support other encodings
-            text_stream = TextIOWrapper(csv_data.buffer, encoding='utf-8')
+            # TODO: support other encodings (in OGC API - Joins spec)
+            text_stream = TextIOWrapper(csv_data.buffer,
+                                        encoding='utf-8', errors='replace')
             all_lines = text_stream.readlines()
             num_lines = len(all_lines)
 
@@ -471,10 +510,14 @@ def process_csv(self, collection_id: str,
             "data": join_data
         }
 
+        # Lazily clean up any stale sources
+        self._cleanup_sources()
+
         # Store the output as JSON file named 'table-{uuid}.json'
         json_file = self._make_source_path(source_id)
-        with open(json_file, 'w', encoding='utf-8') as f:
-            json.dump(output, f, indent=4)
+        with FileLock(json_file.with_suffix(json_file.suffix + '.lock')):
+            with open(json_file, 'w', encoding='utf-8') as f:
+                json.dump(output, f, indent=4)
 
         # Write source file reference to TinyDB for lookup
         with self._db() as db:
@@ -487,9 +530,6 @@ def process_csv(self, collection_id: str,
             }
             db.insert(doc)
 
-        # Lazily clean up any stale sources
-        self._cleanup_sources()
-
         return output
 
     def list_sources(self, collection_id: str) -> dict:
@@ -499,7 +539,7 @@ def list_sources(self, collection_id: str) -> dict:
 
         :param collection_id: name of feature collection
 
-        :returns: list of dict with source references
+        :returns: dict[str, dict] with references for each source ID
         """
         with self._db() as db:
             q = Query()
@@ -527,6 +567,7 @@ def read_join_source(self, collection_id: str, join_id: str) -> dict:
         """
         Read specific join source metadata.
         Raises a JoinSourceNotFoundError if the source is not found.
+        Raises a JoinSourceMissingError if the source reference is missing.
 
         :param collection_id: Collection identifier
         :param join_id: Join source identifier
@@ -535,32 +576,15 @@ def read_join_source(self, collection_id: str, join_id: str) -> dict:
         if not self._valid_id(join_id):
             raise ValueError('invalid join source ID')
 
-        with self._db() as db:
-            q = Query()
-            result = db.get((q.id == join_id) &
-                            (q.collectionId == collection_id))
-            if not result:
-                raise JoinSourceNotFoundError(
-                    f'join source {join_id} not found for collection {collection_id}')  # noqa
-
-            file_path = Path(result['ref'])
-
-            # Verify file still exists
-            if not file_path.is_file():
-                # Clean up orphaned database entry
-                LOGGER.warning(f'Join source file missing: {file_path}')
-                db.remove((q.id == join_id) &
-                          (q.collectionId == collection_id))
-                raise JoinSourceMissingError(
-                    f'join source {join_id} for collection {collection_id} was removed')  # noqa
+        # This may raise JoinSourceNotFoundError or JoinSourceMissingError
+        source_path = self._find_source_path(collection_id, join_id)
 
-            # Read full JSON source and return document
-            lock = FileLock(file_path)
-            with lock.acquire(timeout=5):
-                with open(file_path, 'r', encoding='utf-8') as f:
-                    source_dict = json.load(f)
+        # Read full JSON source and return document
+        with FileLock(source_path.with_suffix(source_path.suffix + '.lock')):
+            with open(source_path, 'r', encoding='utf-8') as f:
+                source_dict = json.load(f)
 
-            return source_dict
+        return source_dict
 
     def perform_join(self, features: dict, collection_id: str, join_id: str):
         """
@@ -617,7 +641,7 @@ def remove_source(self, collection_id: str, join_id: str) -> bool:
             raise ValueError('invalid join source ID')
 
         try:
-            source = self.read_join_source(collection_id, join_id)
+            source_path = self._find_source_path(collection_id, join_id)
         except JoinSourceNotFoundError:
             # If the join source was not found, we should respond with a 404
             return False
@@ -626,9 +650,9 @@ def remove_source(self, collection_id: str, join_id: str) -> bool:
             return True
 
         # Remove the JSON file from disk
-        deleted = self._delete_source(Path(source['ref']))
+        deleted = self._delete_source(source_path)
 
-        # Remove reference
+        # Remove reference (clean up orphan)
         if deleted:
             with self._db() as db:
                 q = Query()
@@ -639,10 +663,10 @@ def remove_source(self, collection_id: str, join_id: str) -> bool:
 
 
 class JoinSourceNotFoundError(Exception):
-    """Join source is not found (by ID or collection)."""
+    """Join source is not found (by ID and/or collection)."""
     pass
 
 
 class JoinSourceMissingError(FileNotFoundError):
-    """Join source is missing (but still referenced)."""
+    """Join source is missing but still referenced (orphan)."""
     pass