Add LoMa (#63)

davnords · web-flow · commit 6aa83d67e086 · 2026-04-19T18:28:06.000-07:00
* add loma

* remove tyro dep

* Add LoMa-R and fix pixel offset
diff --git a/.gitmodules b/.gitmodules
@@ -94,3 +94,6 @@
 	path = vismatch/third_party/ZippyPoint
 	url = https://github.com/menelaoskanakis/ZippyPoint
 	ignore = untracked
+[submodule "vismatch/third_party/LoMa"]
+	path = vismatch/third_party/LoMa
+	url = https://github.com/davnords/LoMa
diff --git a/README.md b/README.md
@@ -177,7 +177,7 @@ We support the following methods:
 
 **Semi-dense**: ```loftr, eloftr, se2loftr, xoftr, minima-loftr, aspanformer, matchformer, xfeat-star, xfeat-star-steerers[-perm/-learned], edm, rdd-star, topicfm[-plus]```
 
-**Sparse**: ```[sift, superpoint, disk, aliked, dedode, doghardnet, gim, xfeat]-lightglue, dedode, steerers, affine-steerers, xfeat-steerers[-perm/learned], dedode-kornia, [sift, orb, doghardnet]-nn, patch2pix, superglue, r2d2, d2net,  gim-dkm, xfeat, omniglue, [dedode, xfeat, aliked]-subpx, [sift, superpoint]-sphereglue, minima-superpoint-lightglue, liftfeat, rdd-[sparse,lightglue, aliked], ripe, lisrd, zippypoint```
+**Sparse**: ```[sift, superpoint, disk, aliked, dedode, doghardnet, gim, xfeat]-lightglue, dedode, steerers, affine-steerers, xfeat-steerers[-perm/learned], dedode-kornia, [sift, orb, doghardnet]-nn, patch2pix, superglue, r2d2, d2net,  gim-dkm, xfeat, omniglue, [dedode, xfeat, aliked]-subpx, [sift, superpoint]-sphereglue, minima-superpoint-lightglue, liftfeat, rdd-[sparse,lightglue, aliked], ripe, lisrd, zippypoint, loma```
 
 See [Model Details](docs/source/model_details.md) to see runtimes, supported devices, source, and license of each model.
 
diff --git a/vismatch/__init__.py b/vismatch/__init__.py
@@ -95,6 +95,7 @@
     "xfeat-steerers-learned",
     "xfeat-star-steerers-perm",
     "xfeat-star-steerers-learned",
+    "loma",
 ]
 
 
@@ -478,6 +479,10 @@ def get_matcher(
         from vismatch.im_models import zippypoint
 
         return zippypoint.ZippyPointMatcher(device, max_num_keypoints=max_num_keypoints, *args, **kwargs)
+    elif matcher_name == "loma":
+        from vismatch.im_models import loma
+
+        return loma.LoMaMatcher(device, max_num_keypoints, *args, **kwargs)
     else:
         raise RuntimeError(
             f"Matcher {matcher_name} not yet supported. Consider submitted a PR to add it. Available models: {available_models}"
diff --git a/vismatch/im_models/loma.py b/vismatch/im_models/loma.py
@@ -0,0 +1,92 @@
+import torch
+
+from typing import Literal
+
+from vismatch import THIRD_PARTY_DIR, BaseMatcher  # noqa: F401
+from vismatch.utils import add_to_path, resize_to_divisible
+
+add_to_path(THIRD_PARTY_DIR.joinpath("LoMa/src"))
+
+from loma.loma import (
+    LoMaB,
+    LoMaB128,
+    LoMaL,
+    LoMaG,
+    LoMaR,
+    LoMa,
+    filter_matches,
+    to_pixel_coords,
+)
+
+
+class LoMaMatcher(BaseMatcher):
+    divisible_size = 14  # for DINOv2 in the descriptor of LoMa-{B, L, G, R}. LoMa-B128 can handle arbitrary resolutions and is more lightweight.
+
+    def __init__(
+        self,
+        device="cpu",
+        max_num_keypoints=2048,
+        arch: Literal["LoMa-B", "LoMa-L", "LoMa-G", "LoMa-B128", "LoMa-R"] = "LoMa-B",
+        **kwargs,
+    ):
+        super().__init__(device, **kwargs)
+        self.max_num_keypoints = max_num_keypoints
+
+        if arch == "LoMa-B":
+            cfg = LoMaB()
+        elif arch == "LoMa-L":
+            cfg = LoMaL()
+        elif arch == "LoMa-G":
+            cfg = LoMaG()
+        elif arch == "LoMa-B128":
+            cfg = LoMaB128()
+        elif arch == "LoMa-R":
+            cfg = LoMaR()
+        else:
+            raise ValueError(
+                f"Unsupported architecture '{arch}' for LoMa. Supported: 'LoMa-B', 'LoMa-L', 'LoMa-G', 'LoMa-B128', 'LoMa-R'."
+            )
+
+        # This automatically loads weights using torch.hub.load_state_dict_from_url
+        self.matcher = LoMa(cfg)
+
+    def preprocess(self, img):
+        _, h, w = img.shape
+        orig_shape = h, w
+        img = resize_to_divisible(img, self.divisible_size)
+        img = img.unsqueeze(0)
+        return img, orig_shape
+
+    def _forward(self, img0, img1):
+        img0, img0_orig_shape = self.preprocess(img0)
+        img1, img1_orig_shape = self.preprocess(img1)
+
+        H0, W0 = img0.shape[-2:]
+        H1, W1 = img1.shape[-2:]
+
+        kpts0, desc0, _, _ = self.matcher.detect_and_describe(img0, self.max_num_keypoints)
+        kpts1, desc1, _, _ = self.matcher.detect_and_describe(img1, self.max_num_keypoints)
+
+        scores = self.matcher(kpts0, kpts1, desc0, desc1)["scores"]
+        m0, _, _, _ = filter_matches(scores, self.matcher.cfg.filter_threshold)
+
+        valid = m0[0] > -1
+        matched_kpts0 = to_pixel_coords(kpts0[0][torch.where(valid)[0]], H0, W0)
+        matched_kpts1 = to_pixel_coords(kpts1[0][m0[0][valid]], H1, W1)
+
+        all_kpts0 = to_pixel_coords(kpts0[0], H0, W0)
+        all_kpts1 = to_pixel_coords(kpts1[0], H1, W1)
+
+        matched_kpts0 = self.rescale_coords(matched_kpts0, *img0_orig_shape, H0, W0)
+        matched_kpts1 = self.rescale_coords(matched_kpts1, *img1_orig_shape, H1, W1)
+        all_kpts0 = self.rescale_coords(all_kpts0, *img0_orig_shape, H0, W0)
+        all_kpts1 = self.rescale_coords(all_kpts1, *img1_orig_shape, H1, W1)
+
+        # LoMa uses COLMAP convention for pixel coords (see https://github.com/gmberton/vismatch/pull/63) so we subtact 0.5 for repo compatability
+        offset = 0.5
+        matched_kpts0 -= offset
+        matched_kpts1 -= offset
+        all_kpts0 -= offset
+        all_kpts1 -= offset
+
+        return matched_kpts0, matched_kpts1, all_kpts0, all_kpts1, desc0[0], desc1[0]
diff --git a/vismatch/third_party/LoMa b/vismatch/third_party/LoMa
@@ -0,0 +1 @@
+Subproject commit 9105854833f55d18194d0505d913f0a74b194ef0