commit 5723ddf349dcdcb5d0a837bf447cb784ff0f77f6 Author: jianjiang Date: Tue Apr 25 20:01:16 2023 +0800 SuperGlue-Image-Matching diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e2e6830 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,33 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml new file mode 100644 index 0000000..1966567 --- /dev/null +++ b/.gitea/workflows/build.yaml @@ -0,0 +1,47 @@ +name: Build +run-name: ${{ github.actor }} is upgrade release 🚀 +on: [push] +env: + REPOSITORY: ${{ github.repository }} + COMMIT_ID: ${{ github.sha }} +jobs: + Build-Deploy-Actions: + runs-on: ubuntu-latest + steps: + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!" + - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v3 + - + name: Setup Git LFS + run: | + git lfs install + git lfs fetch + git lfs checkout + - name: List files in the repository + run: | + ls ${{ github.workspace }} + - + name: Docker Image Info + id: image-info + run: | + echo "::set-output name=image_name::$(echo $REPOSITORY | tr '[:upper:]' '[:lower:]')" + echo "::set-output name=image_tag::${COMMIT_ID:0:10}" + - + name: Login to Docker Hub + uses: docker/login-action@v2 + with: + registry: artifacts.iflytek.com + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - + name: Build and push + run: | + docker version + docker buildx build -t artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} . --file ${{ github.workspace }}/Dockerfile --load + docker push artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} + docker rmi artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} + - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b7fde67 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +#FROM python:3.8.13 +FROM artifacts.iflytek.com/docker-private/atp/base_image_for_ailab:0.0.1 + +WORKDIR /app + +COPY . /app + +RUN pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple +RUN pip install -r requirements.txt + +RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y + + +CMD ["python", "app.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..83fe879 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +--- +title: SuperGlue Image Matching +emoji: 🧚‍♀️ +colorFrom: purple +colorTo: indigo +sdk: gradio +sdk_version: 3.8.1 +app_file: app.py +pinned: false +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py new file mode 100644 index 0000000..564c962 --- /dev/null +++ b/app.py @@ -0,0 +1,129 @@ +import matplotlib.cm as cm +import torch +import gradio as gr +from models.matching import Matching +from models.utils import (make_matching_plot_fast, process_image) + +torch.set_grad_enabled(False) + +# Load the SuperPoint and SuperGlue models. +device = 'cuda' if torch.cuda.is_available() else 'cpu' + +resize = [640, 640] +max_keypoints = 1024 +keypoint_threshold = 0.005 +nms_radius = 4 +sinkhorn_iterations = 20 +match_threshold = 0.2 +resize_float = False + +config_indoor = { + 'superpoint': { + 'nms_radius': nms_radius, + 'keypoint_threshold': keypoint_threshold, + 'max_keypoints': max_keypoints + }, + 'superglue': { + 'weights': "indoor", + 'sinkhorn_iterations': sinkhorn_iterations, + 'match_threshold': match_threshold, + } +} + +config_outdoor = { + 'superpoint': { + 'nms_radius': nms_radius, + 'keypoint_threshold': keypoint_threshold, + 'max_keypoints': max_keypoints + }, + 'superglue': { + 'weights': "outdoor", + 'sinkhorn_iterations': sinkhorn_iterations, + 'match_threshold': match_threshold, + } +} + +matching_indoor = Matching(config_indoor).eval().to(device) +matching_outdoor = Matching(config_outdoor).eval().to(device) + +def run(input0, input1, superglue): + if superglue == "indoor": + matching = matching_indoor + else: + matching = matching_outdoor + + name0 = 'image1' + name1 = 'image2' + + # If a rotation integer is provided (e.g. from EXIF data), use it: + rot0, rot1 = 0, 0 + + # Load the image pair. + image0, inp0, scales0 = process_image(input0, device, resize, rot0, resize_float) + image1, inp1, scales1 = process_image(input1, device, resize, rot1, resize_float) + + if image0 is None or image1 is None: + print('Problem reading image pair') + return + + # Perform the matching. + pred = matching({'image0': inp0, 'image1': inp1}) + pred = {k: v[0].detach().numpy() for k, v in pred.items()} + kpts0, kpts1 = pred['keypoints0'], pred['keypoints1'] + matches, conf = pred['matches0'], pred['matching_scores0'] + + valid = matches > -1 + mkpts0 = kpts0[valid] + mkpts1 = kpts1[matches[valid]] + mconf = conf[valid] + + + # Visualize the matches. + color = cm.jet(mconf) + text = [ + 'SuperGlue', + 'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)), + '{}'.format(len(mkpts0)), + ] + + if rot0 != 0 or rot1 != 0: + text.append('Rotation: {}:{}'.format(rot0, rot1)) + + # Display extra parameter info. + k_thresh = matching.superpoint.config['keypoint_threshold'] + m_thresh = matching.superglue.config['match_threshold'] + small_text = [ + 'Keypoint Threshold: {:.4f}'.format(k_thresh), + 'Match Threshold: {:.2f}'.format(m_thresh), + 'Image Pair: {}:{}'.format(name0, name1), + ] + + output = make_matching_plot_fast( + image0, image1, kpts0, kpts1, mkpts0, mkpts1, color, + text, show_keypoints=True, small_text=small_text) + + print('Source Image - {}, Destination Image - {}, {}, Match Percentage - {}'.format(name0, name1, text[2], len(mkpts0)/len(kpts0))) + return output, text[2], str((len(mkpts0)/len(kpts0))*100.0) + '%' + +if __name__ == '__main__': + + glue = gr.Interface( + fn=run, + inputs=[ + gr.Image(label='Input Image'), + gr.Image(label='Match Image'), + gr.Radio(choices=["indoor", "outdoor"], value="indoor", type="value", label="SuperGlueType", interactive=True), + ], + outputs=[gr.Image( + type="pil", + label="Result"), + gr.Textbox(label="Keypoints Matched"), + gr.Textbox(label="Match Percentage") + ], + examples=[ + ['./taj-1.jpg', './taj-2.jpg', "outdoor"], + ['./outdoor-1.JPEG', './outdoor-2.JPEG', "outdoor"] + ] + ) + glue.queue() + glue.launch(server_name = "0.0.0.0") diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/__pycache__/__init__.cpython-38.pyc b/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..7c84b2a Binary files /dev/null and b/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/__pycache__/matching.cpython-38.pyc b/models/__pycache__/matching.cpython-38.pyc new file mode 100644 index 0000000..792bf95 Binary files /dev/null and b/models/__pycache__/matching.cpython-38.pyc differ diff --git a/models/__pycache__/superglue.cpython-38.pyc b/models/__pycache__/superglue.cpython-38.pyc new file mode 100644 index 0000000..cdbbec2 Binary files /dev/null and b/models/__pycache__/superglue.cpython-38.pyc differ diff --git a/models/__pycache__/superpoint.cpython-38.pyc b/models/__pycache__/superpoint.cpython-38.pyc new file mode 100644 index 0000000..de670a6 Binary files /dev/null and b/models/__pycache__/superpoint.cpython-38.pyc differ diff --git a/models/__pycache__/utils.cpython-38.pyc b/models/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..080dabc Binary files /dev/null and b/models/__pycache__/utils.cpython-38.pyc differ diff --git a/models/matching.py b/models/matching.py new file mode 100644 index 0000000..5d17420 --- /dev/null +++ b/models/matching.py @@ -0,0 +1,84 @@ +# %BANNER_BEGIN% +# --------------------------------------------------------------------- +# %COPYRIGHT_BEGIN% +# +# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL +# +# Unpublished Copyright (c) 2020 +# Magic Leap, Inc., All Rights Reserved. +# +# NOTICE: All information contained herein is, and remains the property +# of COMPANY. The intellectual and technical concepts contained herein +# are proprietary to COMPANY and may be covered by U.S. and Foreign +# Patents, patents in process, and are protected by trade secret or +# copyright law. Dissemination of this information or reproduction of +# this material is strictly forbidden unless prior written permission is +# obtained from COMPANY. Access to the source code contained herein is +# hereby forbidden to anyone except current COMPANY employees, managers +# or contractors who have executed Confidentiality and Non-disclosure +# agreements explicitly covering such access. +# +# The copyright notice above does not evidence any actual or intended +# publication or disclosure of this source code, which includes +# information that is confidential and/or proprietary, and is a trade +# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, +# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS +# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS +# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND +# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE +# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS +# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, +# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. +# +# %COPYRIGHT_END% +# ---------------------------------------------------------------------- +# %AUTHORS_BEGIN% +# +# Originating Authors: Paul-Edouard Sarlin +# +# %AUTHORS_END% +# --------------------------------------------------------------------*/ +# %BANNER_END% + +import torch + +from .superpoint import SuperPoint +from .superglue import SuperGlue + + +class Matching(torch.nn.Module): + """ Image Matching Frontend (SuperPoint + SuperGlue) """ + def __init__(self, config={}): + super().__init__() + self.superpoint = SuperPoint(config.get('superpoint', {})) + self.superglue = SuperGlue(config.get('superglue', {})) + + def forward(self, data): + """ Run SuperPoint (optionally) and SuperGlue + SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input + Args: + data: dictionary with minimal keys: ['image0', 'image1'] + """ + pred = {} + + # Extract SuperPoint (keypoints, scores, descriptors) if not provided + if 'keypoints0' not in data: + pred0 = self.superpoint({'image': data['image0']}) + pred = {**pred, **{k+'0': v for k, v in pred0.items()}} + if 'keypoints1' not in data: + pred1 = self.superpoint({'image': data['image1']}) + pred = {**pred, **{k+'1': v for k, v in pred1.items()}} + + # Batch all features + # We should either have i) one image per batch, or + # ii) the same number of local features for all images in the batch. + data = {**data, **pred} + + for k in data: + if isinstance(data[k], (list, tuple)): + data[k] = torch.stack(data[k]) + + # Perform the matching + pred = {**pred, **self.superglue(data)} + + return pred diff --git a/models/superglue.py b/models/superglue.py new file mode 100644 index 0000000..5a89b03 --- /dev/null +++ b/models/superglue.py @@ -0,0 +1,285 @@ +# %BANNER_BEGIN% +# --------------------------------------------------------------------- +# %COPYRIGHT_BEGIN% +# +# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL +# +# Unpublished Copyright (c) 2020 +# Magic Leap, Inc., All Rights Reserved. +# +# NOTICE: All information contained herein is, and remains the property +# of COMPANY. The intellectual and technical concepts contained herein +# are proprietary to COMPANY and may be covered by U.S. and Foreign +# Patents, patents in process, and are protected by trade secret or +# copyright law. Dissemination of this information or reproduction of +# this material is strictly forbidden unless prior written permission is +# obtained from COMPANY. Access to the source code contained herein is +# hereby forbidden to anyone except current COMPANY employees, managers +# or contractors who have executed Confidentiality and Non-disclosure +# agreements explicitly covering such access. +# +# The copyright notice above does not evidence any actual or intended +# publication or disclosure of this source code, which includes +# information that is confidential and/or proprietary, and is a trade +# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, +# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS +# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS +# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND +# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE +# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS +# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, +# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. +# +# %COPYRIGHT_END% +# ---------------------------------------------------------------------- +# %AUTHORS_BEGIN% +# +# Originating Authors: Paul-Edouard Sarlin +# +# %AUTHORS_END% +# --------------------------------------------------------------------*/ +# %BANNER_END% + +from copy import deepcopy +from pathlib import Path +from typing import List, Tuple + +import torch +from torch import nn + + +def MLP(channels: List[int], do_bn: bool = True) -> nn.Module: + """ Multi-layer perceptron """ + n = len(channels) + layers = [] + for i in range(1, n): + layers.append( + nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True)) + if i < (n-1): + if do_bn: + layers.append(nn.BatchNorm1d(channels[i])) + layers.append(nn.ReLU()) + return nn.Sequential(*layers) + + +def normalize_keypoints(kpts, image_shape): + """ Normalize keypoints locations based on image image_shape""" + _, _, height, width = image_shape + one = kpts.new_tensor(1) + size = torch.stack([one*width, one*height])[None] + center = size / 2 + scaling = size.max(1, keepdim=True).values * 0.7 + return (kpts - center[:, None, :]) / scaling[:, None, :] + + +class KeypointEncoder(nn.Module): + """ Joint encoding of visual appearance and location using MLPs""" + def __init__(self, feature_dim: int, layers: List[int]) -> None: + super().__init__() + self.encoder = MLP([3] + layers + [feature_dim]) + nn.init.constant_(self.encoder[-1].bias, 0.0) + + def forward(self, kpts, scores): + inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)] + return self.encoder(torch.cat(inputs, dim=1)) + + +def attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]: + dim = query.shape[1] + scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5 + prob = torch.nn.functional.softmax(scores, dim=-1) + return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob + + +class MultiHeadedAttention(nn.Module): + """ Multi-head attention to increase model expressivitiy """ + def __init__(self, num_heads: int, d_model: int): + super().__init__() + assert d_model % num_heads == 0 + self.dim = d_model // num_heads + self.num_heads = num_heads + self.merge = nn.Conv1d(d_model, d_model, kernel_size=1) + self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)]) + + def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor: + batch_dim = query.size(0) + query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1) + for l, x in zip(self.proj, (query, key, value))] + x, _ = attention(query, key, value) + return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1)) + + +class AttentionalPropagation(nn.Module): + def __init__(self, feature_dim: int, num_heads: int): + super().__init__() + self.attn = MultiHeadedAttention(num_heads, feature_dim) + self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim]) + nn.init.constant_(self.mlp[-1].bias, 0.0) + + def forward(self, x: torch.Tensor, source: torch.Tensor) -> torch.Tensor: + message = self.attn(x, source, source) + return self.mlp(torch.cat([x, message], dim=1)) + + +class AttentionalGNN(nn.Module): + def __init__(self, feature_dim: int, layer_names: List[str]) -> None: + super().__init__() + self.layers = nn.ModuleList([ + AttentionalPropagation(feature_dim, 4) + for _ in range(len(layer_names))]) + self.names = layer_names + + def forward(self, desc0: torch.Tensor, desc1: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]: + for layer, name in zip(self.layers, self.names): + if name == 'cross': + src0, src1 = desc1, desc0 + else: # if name == 'self': + src0, src1 = desc0, desc1 + delta0, delta1 = layer(desc0, src0), layer(desc1, src1) + desc0, desc1 = (desc0 + delta0), (desc1 + delta1) + return desc0, desc1 + + +def log_sinkhorn_iterations(Z: torch.Tensor, log_mu: torch.Tensor, log_nu: torch.Tensor, iters: int) -> torch.Tensor: + """ Perform Sinkhorn Normalization in Log-space for stability""" + u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu) + for _ in range(iters): + u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2) + v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1) + return Z + u.unsqueeze(2) + v.unsqueeze(1) + + +def log_optimal_transport(scores: torch.Tensor, alpha: torch.Tensor, iters: int) -> torch.Tensor: + """ Perform Differentiable Optimal Transport in Log-space for stability""" + b, m, n = scores.shape + one = scores.new_tensor(1) + ms, ns = (m*one).to(scores), (n*one).to(scores) + + bins0 = alpha.expand(b, m, 1) + bins1 = alpha.expand(b, 1, n) + alpha = alpha.expand(b, 1, 1) + + couplings = torch.cat([torch.cat([scores, bins0], -1), + torch.cat([bins1, alpha], -1)], 1) + + norm = - (ms + ns).log() + log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm]) + log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm]) + log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1) + + Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters) + Z = Z - norm # multiply probabilities by M+N + return Z + + +def arange_like(x, dim: int): + return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1 + + +class SuperGlue(nn.Module): + """SuperGlue feature matching middle-end + + Given two sets of keypoints and locations, we determine the + correspondences by: + 1. Keypoint Encoding (normalization + visual feature and location fusion) + 2. Graph Neural Network with multiple self and cross-attention layers + 3. Final projection layer + 4. Optimal Transport Layer (a differentiable Hungarian matching algorithm) + 5. Thresholding matrix based on mutual exclusivity and a match_threshold + + The correspondence ids use -1 to indicate non-matching points. + + Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew + Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural + Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763 + + """ + default_config = { + 'descriptor_dim': 256, + 'weights': 'indoor', + 'keypoint_encoder': [32, 64, 128, 256], + 'GNN_layers': ['self', 'cross'] * 9, + 'sinkhorn_iterations': 100, + 'match_threshold': 0.2, + } + + def __init__(self, config): + super().__init__() + self.config = {**self.default_config, **config} + + self.kenc = KeypointEncoder( + self.config['descriptor_dim'], self.config['keypoint_encoder']) + + self.gnn = AttentionalGNN( + feature_dim=self.config['descriptor_dim'], layer_names=self.config['GNN_layers']) + + self.final_proj = nn.Conv1d( + self.config['descriptor_dim'], self.config['descriptor_dim'], + kernel_size=1, bias=True) + + bin_score = torch.nn.Parameter(torch.tensor(1.)) + self.register_parameter('bin_score', bin_score) + + assert self.config['weights'] in ['indoor', 'outdoor'] + path = Path(__file__).parent + path = path / 'weights/superglue_{}.pth'.format(self.config['weights']) + self.load_state_dict(torch.load(str(path))) + print('Loaded SuperGlue model (\"{}\" weights)'.format( + self.config['weights'])) + + def forward(self, data): + """Run SuperGlue on a pair of keypoints and descriptors""" + desc0, desc1 = data['descriptors0'], data['descriptors1'] + kpts0, kpts1 = data['keypoints0'], data['keypoints1'] + + if kpts0.shape[1] == 0 or kpts1.shape[1] == 0: # no keypoints + shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1] + return { + 'matches0': kpts0.new_full(shape0, -1, dtype=torch.int), + 'matches1': kpts1.new_full(shape1, -1, dtype=torch.int), + 'matching_scores0': kpts0.new_zeros(shape0), + 'matching_scores1': kpts1.new_zeros(shape1), + } + + # Keypoint normalization. + kpts0 = normalize_keypoints(kpts0, data['image0'].shape) + kpts1 = normalize_keypoints(kpts1, data['image1'].shape) + + # Keypoint MLP encoder. + desc0 = desc0 + self.kenc(kpts0, data['scores0']) + desc1 = desc1 + self.kenc(kpts1, data['scores1']) + + # Multi-layer Transformer network. + desc0, desc1 = self.gnn(desc0, desc1) + + # Final MLP projection. + mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1) + + # Compute matching descriptor distance. + scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) + scores = scores / self.config['descriptor_dim']**.5 + + # Run the optimal transport. + scores = log_optimal_transport( + scores, self.bin_score, + iters=self.config['sinkhorn_iterations']) + + # Get the matches with score above "match_threshold". + max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1) + indices0, indices1 = max0.indices, max1.indices + mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) + mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) + zero = scores.new_tensor(0) + mscores0 = torch.where(mutual0, max0.values.exp(), zero) + mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) + valid0 = mutual0 & (mscores0 > self.config['match_threshold']) + valid1 = mutual1 & valid0.gather(1, indices1) + indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) + indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) + + return { + 'matches0': indices0, # use -1 for invalid match + 'matches1': indices1, # use -1 for invalid match + 'matching_scores0': mscores0, + 'matching_scores1': mscores1, + } diff --git a/models/superpoint.py b/models/superpoint.py new file mode 100644 index 0000000..b837d93 --- /dev/null +++ b/models/superpoint.py @@ -0,0 +1,202 @@ +# %BANNER_BEGIN% +# --------------------------------------------------------------------- +# %COPYRIGHT_BEGIN% +# +# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL +# +# Unpublished Copyright (c) 2020 +# Magic Leap, Inc., All Rights Reserved. +# +# NOTICE: All information contained herein is, and remains the property +# of COMPANY. The intellectual and technical concepts contained herein +# are proprietary to COMPANY and may be covered by U.S. and Foreign +# Patents, patents in process, and are protected by trade secret or +# copyright law. Dissemination of this information or reproduction of +# this material is strictly forbidden unless prior written permission is +# obtained from COMPANY. Access to the source code contained herein is +# hereby forbidden to anyone except current COMPANY employees, managers +# or contractors who have executed Confidentiality and Non-disclosure +# agreements explicitly covering such access. +# +# The copyright notice above does not evidence any actual or intended +# publication or disclosure of this source code, which includes +# information that is confidential and/or proprietary, and is a trade +# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, +# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS +# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS +# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND +# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE +# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS +# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, +# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. +# +# %COPYRIGHT_END% +# ---------------------------------------------------------------------- +# %AUTHORS_BEGIN% +# +# Originating Authors: Paul-Edouard Sarlin +# +# %AUTHORS_END% +# --------------------------------------------------------------------*/ +# %BANNER_END% + +from pathlib import Path +import torch +from torch import nn + +def simple_nms(scores, nms_radius: int): + """ Fast Non-maximum suppression to remove nearby points """ + assert(nms_radius >= 0) + + def max_pool(x): + return torch.nn.functional.max_pool2d( + x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + for _ in range(2): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +def remove_borders(keypoints, scores, border: int, height: int, width: int): + """ Removes keypoints too close to the border """ + mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border)) + mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border)) + mask = mask_h & mask_w + return keypoints[mask], scores[mask] + + +def top_k_keypoints(keypoints, scores, k: int): + if k >= len(keypoints): + return keypoints, scores + scores, indices = torch.topk(scores, k, dim=0) + return keypoints[indices], scores + + +def sample_descriptors(keypoints, descriptors, s: int = 8): + """ Interpolate descriptors at keypoint locations """ + b, c, h, w = descriptors.shape + keypoints = keypoints - s / 2 + 0.5 + keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)], + ).to(keypoints)[None] + keypoints = keypoints*2 - 1 # normalize to (-1, 1) + args = {'align_corners': True} if torch.__version__ >= '1.3' else {} + descriptors = torch.nn.functional.grid_sample( + descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args) + descriptors = torch.nn.functional.normalize( + descriptors.reshape(b, c, -1), p=2, dim=1) + return descriptors + + +class SuperPoint(nn.Module): + """SuperPoint Convolutional Detector and Descriptor + + SuperPoint: Self-Supervised Interest Point Detection and + Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew + Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629 + + """ + default_config = { + 'descriptor_dim': 256, + 'nms_radius': 4, + 'keypoint_threshold': 0.005, + 'max_keypoints': -1, + 'remove_borders': 4, + } + + def __init__(self, config): + super().__init__() + self.config = {**self.default_config, **config} + + self.relu = nn.ReLU(inplace=True) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 + + self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) + self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) + self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) + self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) + self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) + self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) + self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) + self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) + + self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) + + self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convDb = nn.Conv2d( + c5, self.config['descriptor_dim'], + kernel_size=1, stride=1, padding=0) + + path = Path(__file__).parent / 'weights/superpoint_v1.pth' + self.load_state_dict(torch.load(str(path))) + + mk = self.config['max_keypoints'] + if mk == 0 or mk < -1: + raise ValueError('\"max_keypoints\" must be positive or \"-1\"') + + print('Loaded SuperPoint model') + + def forward(self, data): + """ Compute keypoints, scores, descriptors for image """ + # Shared Encoder + x = self.relu(self.conv1a(data['image'])) + x = self.relu(self.conv1b(x)) + x = self.pool(x) + x = self.relu(self.conv2a(x)) + x = self.relu(self.conv2b(x)) + x = self.pool(x) + x = self.relu(self.conv3a(x)) + x = self.relu(self.conv3b(x)) + x = self.pool(x) + x = self.relu(self.conv4a(x)) + x = self.relu(self.conv4b(x)) + + # Compute the dense keypoint scores + cPa = self.relu(self.convPa(x)) + scores = self.convPb(cPa) + scores = torch.nn.functional.softmax(scores, 1)[:, :-1] + b, _, h, w = scores.shape + scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8) + scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8) + scores = simple_nms(scores, self.config['nms_radius']) + + # Extract keypoints + keypoints = [ + torch.nonzero(s > self.config['keypoint_threshold']) + for s in scores] + scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)] + + # Discard keypoints near the image borders + keypoints, scores = list(zip(*[ + remove_borders(k, s, self.config['remove_borders'], h*8, w*8) + for k, s in zip(keypoints, scores)])) + + # Keep the k keypoints with highest score + if self.config['max_keypoints'] >= 0: + keypoints, scores = list(zip(*[ + top_k_keypoints(k, s, self.config['max_keypoints']) + for k, s in zip(keypoints, scores)])) + + # Convert (h, w) to (x, y) + keypoints = [torch.flip(k, [1]).float() for k in keypoints] + + # Compute the dense descriptors + cDa = self.relu(self.convDa(x)) + descriptors = self.convDb(cDa) + descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1) + + # Extract descriptors + descriptors = [sample_descriptors(k[None], d[None], 8)[0] + for k, d in zip(keypoints, descriptors)] + + return { + 'keypoints': keypoints, + 'scores': scores, + 'descriptors': descriptors, + } diff --git a/models/utils.py b/models/utils.py new file mode 100644 index 0000000..1a506ec --- /dev/null +++ b/models/utils.py @@ -0,0 +1,567 @@ +# %BANNER_BEGIN% +# --------------------------------------------------------------------- +# %COPYRIGHT_BEGIN% +# +# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL +# +# Unpublished Copyright (c) 2020 +# Magic Leap, Inc., All Rights Reserved. +# +# NOTICE: All information contained herein is, and remains the property +# of COMPANY. The intellectual and technical concepts contained herein +# are proprietary to COMPANY and may be covered by U.S. and Foreign +# Patents, patents in process, and are protected by trade secret or +# copyright law. Dissemination of this information or reproduction of +# this material is strictly forbidden unless prior written permission is +# obtained from COMPANY. Access to the source code contained herein is +# hereby forbidden to anyone except current COMPANY employees, managers +# or contractors who have executed Confidentiality and Non-disclosure +# agreements explicitly covering such access. +# +# The copyright notice above does not evidence any actual or intended +# publication or disclosure of this source code, which includes +# information that is confidential and/or proprietary, and is a trade +# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, +# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS +# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS +# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND +# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE +# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS +# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, +# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. +# +# %COPYRIGHT_END% +# ---------------------------------------------------------------------- +# %AUTHORS_BEGIN% +# +# Originating Authors: Paul-Edouard Sarlin +# Daniel DeTone +# Tomasz Malisiewicz +# +# %AUTHORS_END% +# --------------------------------------------------------------------*/ +# %BANNER_END% + +from pathlib import Path +import time +from collections import OrderedDict +from threading import Thread +import numpy as np +import cv2 +import torch +import matplotlib.pyplot as plt +import matplotlib +matplotlib.use('Agg') + + +class AverageTimer: + """ Class to help manage printing simple timing of code execution. """ + + def __init__(self, smoothing=0.3, newline=False): + self.smoothing = smoothing + self.newline = newline + self.times = OrderedDict() + self.will_print = OrderedDict() + self.reset() + + def reset(self): + now = time.time() + self.start = now + self.last_time = now + for name in self.will_print: + self.will_print[name] = False + + def update(self, name='default'): + now = time.time() + dt = now - self.last_time + if name in self.times: + dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name] + self.times[name] = dt + self.will_print[name] = True + self.last_time = now + + def print(self, text='Timer'): + total = 0. + print('[{}]'.format(text), end=' ') + for key in self.times: + val = self.times[key] + if self.will_print[key]: + print('%s=%.3f' % (key, val), end=' ') + total += val + print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ') + if self.newline: + print(flush=True) + else: + print(end='\r', flush=True) + self.reset() + + +class VideoStreamer: + """ Class to help process image streams. Four types of possible inputs:" + 1.) USB Webcam. + 2.) An IP camera + 3.) A directory of images (files in directory matching 'image_glob'). + 4.) A video file, such as an .mp4 or .avi file. + """ + def __init__(self, basedir, resize, skip, image_glob, max_length=1000000): + self._ip_grabbed = False + self._ip_running = False + self._ip_camera = False + self._ip_image = None + self._ip_index = 0 + self.cap = [] + self.camera = True + self.video_file = False + self.listing = [] + self.resize = resize + self.interp = cv2.INTER_AREA + self.i = 0 + self.skip = skip + self.max_length = max_length + if isinstance(basedir, int) or basedir.isdigit(): + print('==> Processing USB webcam input: {}'.format(basedir)) + self.cap = cv2.VideoCapture(int(basedir)) + self.listing = range(0, self.max_length) + elif basedir.startswith(('http', 'rtsp')): + print('==> Processing IP camera input: {}'.format(basedir)) + self.cap = cv2.VideoCapture(basedir) + self.start_ip_camera_thread() + self._ip_camera = True + self.listing = range(0, self.max_length) + elif Path(basedir).is_dir(): + print('==> Processing image directory input: {}'.format(basedir)) + self.listing = list(Path(basedir).glob(image_glob[0])) + for j in range(1, len(image_glob)): + image_path = list(Path(basedir).glob(image_glob[j])) + self.listing = self.listing + image_path + self.listing.sort() + self.listing = self.listing[::self.skip] + self.max_length = np.min([self.max_length, len(self.listing)]) + if self.max_length == 0: + raise IOError('No images found (maybe bad \'image_glob\' ?)') + self.listing = self.listing[:self.max_length] + self.camera = False + elif Path(basedir).exists(): + print('==> Processing video input: {}'.format(basedir)) + self.cap = cv2.VideoCapture(basedir) + self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + self.listing = range(0, num_frames) + self.listing = self.listing[::self.skip] + self.video_file = True + self.max_length = np.min([self.max_length, len(self.listing)]) + self.listing = self.listing[:self.max_length] + else: + raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir)) + if self.camera and not self.cap.isOpened(): + raise IOError('Could not read camera') + + def load_image(self, impath): + """ Read image as grayscale and resize to img_size. + Inputs + impath: Path to input image. + Returns + grayim: uint8 numpy array sized H x W. + """ + grayim = cv2.imread(impath, 0) + if grayim is None: + raise Exception('Error reading image %s' % impath) + w, h = grayim.shape[1], grayim.shape[0] + w_new, h_new = process_resize(w, h, self.resize) + grayim = cv2.resize( + grayim, (w_new, h_new), interpolation=self.interp) + return grayim + + def next_frame(self): + """ Return the next frame, and increment internal counter. + Returns + image: Next H x W image. + status: True or False depending whether image was loaded. + """ + + if self.i == self.max_length: + return (None, False) + if self.camera: + + if self._ip_camera: + #Wait for first image, making sure we haven't exited + while self._ip_grabbed is False and self._ip_exited is False: + time.sleep(.001) + + ret, image = self._ip_grabbed, self._ip_image.copy() + if ret is False: + self._ip_running = False + else: + ret, image = self.cap.read() + if ret is False: + print('VideoStreamer: Cannot get image from camera') + return (None, False) + w, h = image.shape[1], image.shape[0] + if self.video_file: + self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i]) + + w_new, h_new = process_resize(w, h, self.resize) + image = cv2.resize(image, (w_new, h_new), + interpolation=self.interp) + image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + else: + image_file = str(self.listing[self.i]) + image = self.load_image(image_file) + self.i = self.i + 1 + return (image, True) + + def start_ip_camera_thread(self): + self._ip_thread = Thread(target=self.update_ip_camera, args=()) + self._ip_running = True + self._ip_thread.start() + self._ip_exited = False + return self + + def update_ip_camera(self): + while self._ip_running: + ret, img = self.cap.read() + if ret is False: + self._ip_running = False + self._ip_exited = True + self._ip_grabbed = False + return + + self._ip_image = img + self._ip_grabbed = ret + self._ip_index += 1 + #print('IPCAMERA THREAD got frame {}'.format(self._ip_index)) + + + def cleanup(self): + self._ip_running = False + +# --- PREPROCESSING --- + +def process_resize(w, h, resize): + assert(len(resize) > 0 and len(resize) <= 2) + if len(resize) == 1 and resize[0] > -1: + scale = resize[0] / max(h, w) + w_new, h_new = int(round(w*scale)), int(round(h*scale)) + elif len(resize) == 1 and resize[0] == -1: + w_new, h_new = w, h + else: # len(resize) == 2: + w_new, h_new = resize[0], resize[1] + + # Issue warning if resolution is too small or too large. + if max(w_new, h_new) < 160: + print('Warning: input resolution is very small, results may vary') + elif max(w_new, h_new) > 2000: + print('Warning: input resolution is very large, results may vary') + + return w_new, h_new + + +def frame2tensor(frame, device): + return torch.from_numpy(frame/255.).float()[None, None].to(device) + + +def read_image(path, device, resize, rotation, resize_float): + image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) + if image is None: + return None, None, None + w, h = image.shape[1], image.shape[0] + w_new, h_new = process_resize(w, h, resize) + scales = (float(w) / float(w_new), float(h) / float(h_new)) + + if resize_float: + image = cv2.resize(image.astype('float32'), (w_new, h_new)) + else: + image = cv2.resize(image, (w_new, h_new)).astype('float32') + + if rotation != 0: + image = np.rot90(image, k=rotation) + if rotation % 2: + scales = scales[::-1] + + inp = frame2tensor(image, device) + return image, inp, scales + +def process_image(image, device, resize, rotation, resize_float): + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + if image is None: + return None, None, None + w, h = image.shape[1], image.shape[0] + w_new, h_new = process_resize(w, h, resize) + scales = (float(w) / float(w_new), float(h) / float(h_new)) + + if resize_float: + image = cv2.resize(image.astype('float32'), (w_new, h_new)) + else: + image = cv2.resize(image, (w_new, h_new)).astype('float32') + + if rotation != 0: + image = np.rot90(image, k=rotation) + if rotation % 2: + scales = scales[::-1] + + inp = frame2tensor(image, device) + return image, inp, scales + +# --- GEOMETRY --- + + +def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): + if len(kpts0) < 5: + return None + + f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) + norm_thresh = thresh / f_mean + + kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + + E, mask = cv2.findEssentialMat( + kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, + method=cv2.RANSAC) + + assert E is not None + + best_num_inliers = 0 + ret = None + for _E in np.split(E, len(E) / 3): + n, R, t, _ = cv2.recoverPose( + _E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) + if n > best_num_inliers: + best_num_inliers = n + ret = (R, t[:, 0], mask.ravel() > 0) + return ret + + +def rotate_intrinsics(K, image_shape, rot): + """image_shape is the shape of the image after rotation""" + assert rot <= 3 + h, w = image_shape[:2][::-1 if (rot % 2) else 1] + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + rot = rot % 4 + if rot == 1: + return np.array([[fy, 0., cy], + [0., fx, w-1-cx], + [0., 0., 1.]], dtype=K.dtype) + elif rot == 2: + return np.array([[fx, 0., w-1-cx], + [0., fy, h-1-cy], + [0., 0., 1.]], dtype=K.dtype) + else: # if rot == 3: + return np.array([[fy, 0., h-1-cy], + [0., fx, cx], + [0., 0., 1.]], dtype=K.dtype) + + +def rotate_pose_inplane(i_T_w, rot): + rotation_matrices = [ + np.array([[np.cos(r), -np.sin(r), 0., 0.], + [np.sin(r), np.cos(r), 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.]], dtype=np.float32) + for r in [np.deg2rad(d) for d in (0, 270, 180, 90)] + ] + return np.dot(rotation_matrices[rot], i_T_w) + + +def scale_intrinsics(K, scales): + scales = np.diag([1./scales[0], 1./scales[1], 1.]) + return np.dot(scales, K) + + +def to_homogeneous(points): + return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1) + + +def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1): + kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + kpts0 = to_homogeneous(kpts0) + kpts1 = to_homogeneous(kpts1) + + t0, t1, t2 = T_0to1[:3, 3] + t_skew = np.array([ + [0, -t2, t1], + [t2, 0, -t0], + [-t1, t0, 0] + ]) + E = t_skew @ T_0to1[:3, :3] + + Ep0 = kpts0 @ E.T # N x 3 + p1Ep0 = np.sum(kpts1 * Ep0, -1) # N + Etp1 = kpts1 @ E # N x 3 + d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) + + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) + return d + + +def angle_error_mat(R1, R2): + cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2 + cos = np.clip(cos, -1., 1.) # numercial errors can make it out of bounds + return np.rad2deg(np.abs(np.arccos(cos))) + + +def angle_error_vec(v1, v2): + n = np.linalg.norm(v1) * np.linalg.norm(v2) + return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0))) + + +def compute_pose_error(T_0to1, R, t): + R_gt = T_0to1[:3, :3] + t_gt = T_0to1[:3, 3] + error_t = angle_error_vec(t, t_gt) + error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation + error_R = angle_error_mat(R, R_gt) + return error_t, error_R + + +def pose_auc(errors, thresholds): + sort_idx = np.argsort(errors) + errors = np.array(errors.copy())[sort_idx] + recall = (np.arange(len(errors)) + 1) / len(errors) + errors = np.r_[0., errors] + recall = np.r_[0., recall] + aucs = [] + for t in thresholds: + last_index = np.searchsorted(errors, t) + r = np.r_[recall[:last_index], recall[last_index-1]] + e = np.r_[errors[:last_index], t] + aucs.append(np.trapz(r, x=e)/t) + return aucs + + +# --- VISUALIZATION --- + + +def plot_image_pair(imgs, dpi=100, size=6, pad=.5): + n = len(imgs) + assert n == 2, 'number of images must be two' + figsize = (size*n, size*3/4) if size is not None else None + _, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) + for i in range(n): + ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255) + ax[i].get_yaxis().set_ticks([]) + ax[i].get_xaxis().set_ticks([]) + for spine in ax[i].spines.values(): # remove frame + spine.set_visible(False) + plt.tight_layout(pad=pad) + + +def plot_keypoints(kpts0, kpts1, color='w', ps=2): + ax = plt.gcf().axes + ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) + ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) + + +def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4): + fig = plt.gcf() + ax = fig.axes + fig.canvas.draw() + + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0)) + fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1)) + + fig.lines = [matplotlib.lines.Line2D( + (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1, + transform=fig.transFigure, c=color[i], linewidth=lw) + for i in range(len(kpts0))] + ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) + ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) + + +def make_matching_plot(image0, image1, kpts0, kpts1, mkpts0, mkpts1, + color, text, path, show_keypoints=False, + fast_viz=False, opencv_display=False, + opencv_title='matches', small_text=[]): + + if fast_viz: + make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1, + color, text, path, show_keypoints, 10, + opencv_display, opencv_title, small_text) + return + + plot_image_pair([image0, image1]) + if show_keypoints: + plot_keypoints(kpts0, kpts1, color='k', ps=4) + plot_keypoints(kpts0, kpts1, color='w', ps=2) + plot_matches(mkpts0, mkpts1, color) + + fig = plt.gcf() + txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w' + fig.text( + 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, + fontsize=15, va='top', ha='left', color=txt_color) + + txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w' + fig.text( + 0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes, + fontsize=5, va='bottom', ha='left', color=txt_color) + + plt.savefig(str(path), bbox_inches='tight', pad_inches=0) + plt.close() + + +def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, + mkpts1, color, text, path=None, + show_keypoints=False, margin=10, + opencv_display=False, opencv_title='', + small_text=[]): + H0, W0 = image0.shape + H1, W1 = image1.shape + H, W = max(H0, H1), W0 + W1 + margin + + out = 255*np.ones((H, W), np.uint8) + out[:H0, :W0] = image0 + out[:H1, W0+margin:] = image1 + out = np.stack([out]*3, -1) + + if show_keypoints: + kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) + white = (255, 255, 255) + black = (0, 0, 0) + for x, y in kpts0: + cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA) + cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA) + for x, y in kpts1: + cv2.circle(out, (x + margin + W0, y), 2, black, -1, + lineType=cv2.LINE_AA) + cv2.circle(out, (x + margin + W0, y), 1, white, -1, + lineType=cv2.LINE_AA) + + mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) + color = (np.array(color[:, :3])*255).astype(int)[:, ::-1] + for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color): + c = c.tolist() + cv2.line(out, (x0, y0), (x1 + margin + W0, y1), + color=c, thickness=1, lineType=cv2.LINE_AA) + # display line end-points as circles + cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA) + cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, + lineType=cv2.LINE_AA) + + # Scale factor for consistent visualization across scales. + sc = min(H / 640., 2.0) + + # Big text. + Ht = int(30 * sc) # text height + txt_color_fg = (255, 255, 255) + txt_color_bg = (0, 0, 0) + for i, t in enumerate(text): + cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, + 1.0*sc, txt_color_bg, 2, cv2.LINE_AA) + cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, + 1.0*sc, txt_color_fg, 1, cv2.LINE_AA) + + # Small text. + Ht = int(18 * sc) # text height + for i, t in enumerate(reversed(small_text)): + cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, + 0.5*sc, txt_color_bg, 2, cv2.LINE_AA) + cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, + 0.5*sc, txt_color_fg, 1, cv2.LINE_AA) + return out + + +def error_colormap(x): + return np.clip( + np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1) diff --git a/models/weights/superglue_indoor.pth b/models/weights/superglue_indoor.pth new file mode 100644 index 0000000..9692521 --- /dev/null +++ b/models/weights/superglue_indoor.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e710469be25ebe1e2ccf68edcae8b2945b0617c8e7e68412251d9d47f5052b1 +size 48233807 diff --git a/models/weights/superglue_outdoor.pth b/models/weights/superglue_outdoor.pth new file mode 100644 index 0000000..79db4b5 --- /dev/null +++ b/models/weights/superglue_outdoor.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5f5e9bb3febf07b69df633c4c3ff7a17f8af26a023aae2b9303d22339195bd +size 48233807 diff --git a/models/weights/superpoint_v1.pth b/models/weights/superpoint_v1.pth new file mode 100644 index 0000000..7648726 --- /dev/null +++ b/models/weights/superpoint_v1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b6708629640ca883673b5d5c097c4ddad37d8048b33f09c8ca0d69db12c40e +size 5206086 diff --git a/outdoor-1.JPEG b/outdoor-1.JPEG new file mode 100644 index 0000000..a49cbd3 Binary files /dev/null and b/outdoor-1.JPEG differ diff --git a/outdoor-2.JPEG b/outdoor-2.JPEG new file mode 100644 index 0000000..7a3d49f Binary files /dev/null and b/outdoor-2.JPEG differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4fd2382 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,61 @@ +aiohttp==3.8.3 +aiosignal==1.2.0 +anyio==3.6.2 +async-timeout==4.0.2 +attrs==22.1.0 +bcrypt==4.0.1 +certifi==2022.9.24 +cffi==1.15.1 +charset-normalizer==2.1.1 +click==8.1.3 +contourpy==1.0.6 +cryptography==38.0.1 +cycler==0.11.0 +fastapi==0.85.1 +ffmpy==0.3.0 +fonttools==4.38.0 +frozenlist==1.3.1 +fsspec==2022.10.0 +gradio==3.8.1 +h11==0.12.0 +httpcore==0.15.0 +httpx==0.23.0 +idna==3.4 +Jinja2==3.1.2 +kiwisolver==1.4.4 +linkify-it-py==1.0.3 +markdown-it-py==2.1.0 +MarkupSafe==2.1.1 +matplotlib==3.6.1 +mdit-py-plugins==0.3.1 +mdurl==0.1.2 +multidict==6.0.2 +numpy==1.23.4 +opencv-python==4.6.0.66 +orjson==3.8.1 +packaging==21.3 +pandas==1.5.1 +paramiko==2.11.0 +Pillow==9.3.0 +pycparser==2.21 +pycryptodome==3.15.0 +pydantic==1.10.2 +pydub==0.25.1 +PyNaCl==1.5.0 +pyparsing==3.0.9 +python-dateutil==2.8.2 +python-multipart==0.0.5 +pytz==2022.5 +PyYAML==6.0 +requests==2.28.1 +rfc3986==1.5.0 +six==1.16.0 +sniffio==1.3.0 +starlette==0.20.4 +torch==1.13.0 +typing_extensions==4.4.0 +uc-micro-py==1.0.1 +urllib3==1.26.12 +uvicorn==0.19.0 +websockets==10.4 +yarl==1.8.1 diff --git a/taj-1.jpg b/taj-1.jpg new file mode 100644 index 0000000..ea6b62a Binary files /dev/null and b/taj-1.jpg differ diff --git a/taj-2.jpg b/taj-2.jpg new file mode 100644 index 0000000..313e639 Binary files /dev/null and b/taj-2.jpg differ