SuperGlue-Image-Matching
Build-Deploy-Actions Details

This commit is contained in:
jianjiang 2023-04-25 20:01:16 +08:00
commit 5723ddf349
23 changed files with 1444 additions and 0 deletions

33
.gitattributes vendored Normal file
View File

@ -0,0 +1,33 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

View File

@ -0,0 +1,47 @@
name: Build
run-name: ${{ github.actor }} is upgrade release 🚀
on: [push]
env:
REPOSITORY: ${{ github.repository }}
COMMIT_ID: ${{ github.sha }}
jobs:
Build-Deploy-Actions:
runs-on: ubuntu-latest
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!"
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v3
-
name: Setup Git LFS
run: |
git lfs install
git lfs fetch
git lfs checkout
- name: List files in the repository
run: |
ls ${{ github.workspace }}
-
name: Docker Image Info
id: image-info
run: |
echo "::set-output name=image_name::$(echo $REPOSITORY | tr '[:upper:]' '[:lower:]')"
echo "::set-output name=image_tag::${COMMIT_ID:0:10}"
-
name: Login to Docker Hub
uses: docker/login-action@v2
with:
registry: artifacts.iflytek.com
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Build and push
run: |
docker version
docker buildx build -t artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} . --file ${{ github.workspace }}/Dockerfile --load
docker push artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }}
docker rmi artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }}
- run: echo "🍏 This job's status is ${{ job.status }}."

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
#FROM python:3.8.13
FROM artifacts.iflytek.com/docker-private/atp/base_image_for_ailab:0.0.1
WORKDIR /app
COPY . /app
RUN pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple
RUN pip install -r requirements.txt
RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
CMD ["python", "app.py"]

12
README.md Normal file
View File

@ -0,0 +1,12 @@
---
title: SuperGlue Image Matching
emoji: 🧚‍♀️
colorFrom: purple
colorTo: indigo
sdk: gradio
sdk_version: 3.8.1
app_file: app.py
pinned: false
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

129
app.py Normal file
View File

@ -0,0 +1,129 @@
import matplotlib.cm as cm
import torch
import gradio as gr
from models.matching import Matching
from models.utils import (make_matching_plot_fast, process_image)
torch.set_grad_enabled(False)
# Load the SuperPoint and SuperGlue models.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
resize = [640, 640]
max_keypoints = 1024
keypoint_threshold = 0.005
nms_radius = 4
sinkhorn_iterations = 20
match_threshold = 0.2
resize_float = False
config_indoor = {
'superpoint': {
'nms_radius': nms_radius,
'keypoint_threshold': keypoint_threshold,
'max_keypoints': max_keypoints
},
'superglue': {
'weights': "indoor",
'sinkhorn_iterations': sinkhorn_iterations,
'match_threshold': match_threshold,
}
}
config_outdoor = {
'superpoint': {
'nms_radius': nms_radius,
'keypoint_threshold': keypoint_threshold,
'max_keypoints': max_keypoints
},
'superglue': {
'weights': "outdoor",
'sinkhorn_iterations': sinkhorn_iterations,
'match_threshold': match_threshold,
}
}
matching_indoor = Matching(config_indoor).eval().to(device)
matching_outdoor = Matching(config_outdoor).eval().to(device)
def run(input0, input1, superglue):
if superglue == "indoor":
matching = matching_indoor
else:
matching = matching_outdoor
name0 = 'image1'
name1 = 'image2'
# If a rotation integer is provided (e.g. from EXIF data), use it:
rot0, rot1 = 0, 0
# Load the image pair.
image0, inp0, scales0 = process_image(input0, device, resize, rot0, resize_float)
image1, inp1, scales1 = process_image(input1, device, resize, rot1, resize_float)
if image0 is None or image1 is None:
print('Problem reading image pair')
return
# Perform the matching.
pred = matching({'image0': inp0, 'image1': inp1})
pred = {k: v[0].detach().numpy() for k, v in pred.items()}
kpts0, kpts1 = pred['keypoints0'], pred['keypoints1']
matches, conf = pred['matches0'], pred['matching_scores0']
valid = matches > -1
mkpts0 = kpts0[valid]
mkpts1 = kpts1[matches[valid]]
mconf = conf[valid]
# Visualize the matches.
color = cm.jet(mconf)
text = [
'SuperGlue',
'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)),
'{}'.format(len(mkpts0)),
]
if rot0 != 0 or rot1 != 0:
text.append('Rotation: {}:{}'.format(rot0, rot1))
# Display extra parameter info.
k_thresh = matching.superpoint.config['keypoint_threshold']
m_thresh = matching.superglue.config['match_threshold']
small_text = [
'Keypoint Threshold: {:.4f}'.format(k_thresh),
'Match Threshold: {:.2f}'.format(m_thresh),
'Image Pair: {}:{}'.format(name0, name1),
]
output = make_matching_plot_fast(
image0, image1, kpts0, kpts1, mkpts0, mkpts1, color,
text, show_keypoints=True, small_text=small_text)
print('Source Image - {}, Destination Image - {}, {}, Match Percentage - {}'.format(name0, name1, text[2], len(mkpts0)/len(kpts0)))
return output, text[2], str((len(mkpts0)/len(kpts0))*100.0) + '%'
if __name__ == '__main__':
glue = gr.Interface(
fn=run,
inputs=[
gr.Image(label='Input Image'),
gr.Image(label='Match Image'),
gr.Radio(choices=["indoor", "outdoor"], value="indoor", type="value", label="SuperGlueType", interactive=True),
],
outputs=[gr.Image(
type="pil",
label="Result"),
gr.Textbox(label="Keypoints Matched"),
gr.Textbox(label="Match Percentage")
],
examples=[
['./taj-1.jpg', './taj-2.jpg', "outdoor"],
['./outdoor-1.JPEG', './outdoor-2.JPEG', "outdoor"]
]
)
glue.queue()
glue.launch(server_name = "0.0.0.0")

0
models/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

84
models/matching.py Normal file
View File

@ -0,0 +1,84 @@
# %BANNER_BEGIN%
# ---------------------------------------------------------------------
# %COPYRIGHT_BEGIN%
#
# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
#
# Unpublished Copyright (c) 2020
# Magic Leap, Inc., All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains the property
# of COMPANY. The intellectual and technical concepts contained herein
# are proprietary to COMPANY and may be covered by U.S. and Foreign
# Patents, patents in process, and are protected by trade secret or
# copyright law. Dissemination of this information or reproduction of
# this material is strictly forbidden unless prior written permission is
# obtained from COMPANY. Access to the source code contained herein is
# hereby forbidden to anyone except current COMPANY employees, managers
# or contractors who have executed Confidentiality and Non-disclosure
# agreements explicitly covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes
# information that is confidential and/or proprietary, and is a trade
# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
# %COPYRIGHT_END%
# ----------------------------------------------------------------------
# %AUTHORS_BEGIN%
#
# Originating Authors: Paul-Edouard Sarlin
#
# %AUTHORS_END%
# --------------------------------------------------------------------*/
# %BANNER_END%
import torch
from .superpoint import SuperPoint
from .superglue import SuperGlue
class Matching(torch.nn.Module):
""" Image Matching Frontend (SuperPoint + SuperGlue) """
def __init__(self, config={}):
super().__init__()
self.superpoint = SuperPoint(config.get('superpoint', {}))
self.superglue = SuperGlue(config.get('superglue', {}))
def forward(self, data):
""" Run SuperPoint (optionally) and SuperGlue
SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input
Args:
data: dictionary with minimal keys: ['image0', 'image1']
"""
pred = {}
# Extract SuperPoint (keypoints, scores, descriptors) if not provided
if 'keypoints0' not in data:
pred0 = self.superpoint({'image': data['image0']})
pred = {**pred, **{k+'0': v for k, v in pred0.items()}}
if 'keypoints1' not in data:
pred1 = self.superpoint({'image': data['image1']})
pred = {**pred, **{k+'1': v for k, v in pred1.items()}}
# Batch all features
# We should either have i) one image per batch, or
# ii) the same number of local features for all images in the batch.
data = {**data, **pred}
for k in data:
if isinstance(data[k], (list, tuple)):
data[k] = torch.stack(data[k])
# Perform the matching
pred = {**pred, **self.superglue(data)}
return pred

285
models/superglue.py Normal file
View File

@ -0,0 +1,285 @@
# %BANNER_BEGIN%
# ---------------------------------------------------------------------
# %COPYRIGHT_BEGIN%
#
# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
#
# Unpublished Copyright (c) 2020
# Magic Leap, Inc., All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains the property
# of COMPANY. The intellectual and technical concepts contained herein
# are proprietary to COMPANY and may be covered by U.S. and Foreign
# Patents, patents in process, and are protected by trade secret or
# copyright law. Dissemination of this information or reproduction of
# this material is strictly forbidden unless prior written permission is
# obtained from COMPANY. Access to the source code contained herein is
# hereby forbidden to anyone except current COMPANY employees, managers
# or contractors who have executed Confidentiality and Non-disclosure
# agreements explicitly covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes
# information that is confidential and/or proprietary, and is a trade
# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
# %COPYRIGHT_END%
# ----------------------------------------------------------------------
# %AUTHORS_BEGIN%
#
# Originating Authors: Paul-Edouard Sarlin
#
# %AUTHORS_END%
# --------------------------------------------------------------------*/
# %BANNER_END%
from copy import deepcopy
from pathlib import Path
from typing import List, Tuple
import torch
from torch import nn
def MLP(channels: List[int], do_bn: bool = True) -> nn.Module:
""" Multi-layer perceptron """
n = len(channels)
layers = []
for i in range(1, n):
layers.append(
nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True))
if i < (n-1):
if do_bn:
layers.append(nn.BatchNorm1d(channels[i]))
layers.append(nn.ReLU())
return nn.Sequential(*layers)
def normalize_keypoints(kpts, image_shape):
""" Normalize keypoints locations based on image image_shape"""
_, _, height, width = image_shape
one = kpts.new_tensor(1)
size = torch.stack([one*width, one*height])[None]
center = size / 2
scaling = size.max(1, keepdim=True).values * 0.7
return (kpts - center[:, None, :]) / scaling[:, None, :]
class KeypointEncoder(nn.Module):
""" Joint encoding of visual appearance and location using MLPs"""
def __init__(self, feature_dim: int, layers: List[int]) -> None:
super().__init__()
self.encoder = MLP([3] + layers + [feature_dim])
nn.init.constant_(self.encoder[-1].bias, 0.0)
def forward(self, kpts, scores):
inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)]
return self.encoder(torch.cat(inputs, dim=1))
def attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
dim = query.shape[1]
scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5
prob = torch.nn.functional.softmax(scores, dim=-1)
return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob
class MultiHeadedAttention(nn.Module):
""" Multi-head attention to increase model expressivitiy """
def __init__(self, num_heads: int, d_model: int):
super().__init__()
assert d_model % num_heads == 0
self.dim = d_model // num_heads
self.num_heads = num_heads
self.merge = nn.Conv1d(d_model, d_model, kernel_size=1)
self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)])
def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor:
batch_dim = query.size(0)
query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1)
for l, x in zip(self.proj, (query, key, value))]
x, _ = attention(query, key, value)
return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1))
class AttentionalPropagation(nn.Module):
def __init__(self, feature_dim: int, num_heads: int):
super().__init__()
self.attn = MultiHeadedAttention(num_heads, feature_dim)
self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim])
nn.init.constant_(self.mlp[-1].bias, 0.0)
def forward(self, x: torch.Tensor, source: torch.Tensor) -> torch.Tensor:
message = self.attn(x, source, source)
return self.mlp(torch.cat([x, message], dim=1))
class AttentionalGNN(nn.Module):
def __init__(self, feature_dim: int, layer_names: List[str]) -> None:
super().__init__()
self.layers = nn.ModuleList([
AttentionalPropagation(feature_dim, 4)
for _ in range(len(layer_names))])
self.names = layer_names
def forward(self, desc0: torch.Tensor, desc1: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
for layer, name in zip(self.layers, self.names):
if name == 'cross':
src0, src1 = desc1, desc0
else: # if name == 'self':
src0, src1 = desc0, desc1
delta0, delta1 = layer(desc0, src0), layer(desc1, src1)
desc0, desc1 = (desc0 + delta0), (desc1 + delta1)
return desc0, desc1
def log_sinkhorn_iterations(Z: torch.Tensor, log_mu: torch.Tensor, log_nu: torch.Tensor, iters: int) -> torch.Tensor:
""" Perform Sinkhorn Normalization in Log-space for stability"""
u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu)
for _ in range(iters):
u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2)
v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1)
return Z + u.unsqueeze(2) + v.unsqueeze(1)
def log_optimal_transport(scores: torch.Tensor, alpha: torch.Tensor, iters: int) -> torch.Tensor:
""" Perform Differentiable Optimal Transport in Log-space for stability"""
b, m, n = scores.shape
one = scores.new_tensor(1)
ms, ns = (m*one).to(scores), (n*one).to(scores)
bins0 = alpha.expand(b, m, 1)
bins1 = alpha.expand(b, 1, n)
alpha = alpha.expand(b, 1, 1)
couplings = torch.cat([torch.cat([scores, bins0], -1),
torch.cat([bins1, alpha], -1)], 1)
norm = - (ms + ns).log()
log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm])
log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm])
log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1)
Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters)
Z = Z - norm # multiply probabilities by M+N
return Z
def arange_like(x, dim: int):
return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1
class SuperGlue(nn.Module):
"""SuperGlue feature matching middle-end
Given two sets of keypoints and locations, we determine the
correspondences by:
1. Keypoint Encoding (normalization + visual feature and location fusion)
2. Graph Neural Network with multiple self and cross-attention layers
3. Final projection layer
4. Optimal Transport Layer (a differentiable Hungarian matching algorithm)
5. Thresholding matrix based on mutual exclusivity and a match_threshold
The correspondence ids use -1 to indicate non-matching points.
Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew
Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural
Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763
"""
default_config = {
'descriptor_dim': 256,
'weights': 'indoor',
'keypoint_encoder': [32, 64, 128, 256],
'GNN_layers': ['self', 'cross'] * 9,
'sinkhorn_iterations': 100,
'match_threshold': 0.2,
}
def __init__(self, config):
super().__init__()
self.config = {**self.default_config, **config}
self.kenc = KeypointEncoder(
self.config['descriptor_dim'], self.config['keypoint_encoder'])
self.gnn = AttentionalGNN(
feature_dim=self.config['descriptor_dim'], layer_names=self.config['GNN_layers'])
self.final_proj = nn.Conv1d(
self.config['descriptor_dim'], self.config['descriptor_dim'],
kernel_size=1, bias=True)
bin_score = torch.nn.Parameter(torch.tensor(1.))
self.register_parameter('bin_score', bin_score)
assert self.config['weights'] in ['indoor', 'outdoor']
path = Path(__file__).parent
path = path / 'weights/superglue_{}.pth'.format(self.config['weights'])
self.load_state_dict(torch.load(str(path)))
print('Loaded SuperGlue model (\"{}\" weights)'.format(
self.config['weights']))
def forward(self, data):
"""Run SuperGlue on a pair of keypoints and descriptors"""
desc0, desc1 = data['descriptors0'], data['descriptors1']
kpts0, kpts1 = data['keypoints0'], data['keypoints1']
if kpts0.shape[1] == 0 or kpts1.shape[1] == 0: # no keypoints
shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1]
return {
'matches0': kpts0.new_full(shape0, -1, dtype=torch.int),
'matches1': kpts1.new_full(shape1, -1, dtype=torch.int),
'matching_scores0': kpts0.new_zeros(shape0),
'matching_scores1': kpts1.new_zeros(shape1),
}
# Keypoint normalization.
kpts0 = normalize_keypoints(kpts0, data['image0'].shape)
kpts1 = normalize_keypoints(kpts1, data['image1'].shape)
# Keypoint MLP encoder.
desc0 = desc0 + self.kenc(kpts0, data['scores0'])
desc1 = desc1 + self.kenc(kpts1, data['scores1'])
# Multi-layer Transformer network.
desc0, desc1 = self.gnn(desc0, desc1)
# Final MLP projection.
mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
# Compute matching descriptor distance.
scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1)
scores = scores / self.config['descriptor_dim']**.5
# Run the optimal transport.
scores = log_optimal_transport(
scores, self.bin_score,
iters=self.config['sinkhorn_iterations'])
# Get the matches with score above "match_threshold".
max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
indices0, indices1 = max0.indices, max1.indices
mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
zero = scores.new_tensor(0)
mscores0 = torch.where(mutual0, max0.values.exp(), zero)
mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
valid0 = mutual0 & (mscores0 > self.config['match_threshold'])
valid1 = mutual1 & valid0.gather(1, indices1)
indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))
return {
'matches0': indices0, # use -1 for invalid match
'matches1': indices1, # use -1 for invalid match
'matching_scores0': mscores0,
'matching_scores1': mscores1,
}

202
models/superpoint.py Normal file
View File

@ -0,0 +1,202 @@
# %BANNER_BEGIN%
# ---------------------------------------------------------------------
# %COPYRIGHT_BEGIN%
#
# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
#
# Unpublished Copyright (c) 2020
# Magic Leap, Inc., All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains the property
# of COMPANY. The intellectual and technical concepts contained herein
# are proprietary to COMPANY and may be covered by U.S. and Foreign
# Patents, patents in process, and are protected by trade secret or
# copyright law. Dissemination of this information or reproduction of
# this material is strictly forbidden unless prior written permission is
# obtained from COMPANY. Access to the source code contained herein is
# hereby forbidden to anyone except current COMPANY employees, managers
# or contractors who have executed Confidentiality and Non-disclosure
# agreements explicitly covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes
# information that is confidential and/or proprietary, and is a trade
# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
# %COPYRIGHT_END%
# ----------------------------------------------------------------------
# %AUTHORS_BEGIN%
#
# Originating Authors: Paul-Edouard Sarlin
#
# %AUTHORS_END%
# --------------------------------------------------------------------*/
# %BANNER_END%
from pathlib import Path
import torch
from torch import nn
def simple_nms(scores, nms_radius: int):
""" Fast Non-maximum suppression to remove nearby points """
assert(nms_radius >= 0)
def max_pool(x):
return torch.nn.functional.max_pool2d(
x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius)
zeros = torch.zeros_like(scores)
max_mask = scores == max_pool(scores)
for _ in range(2):
supp_mask = max_pool(max_mask.float()) > 0
supp_scores = torch.where(supp_mask, zeros, scores)
new_max_mask = supp_scores == max_pool(supp_scores)
max_mask = max_mask | (new_max_mask & (~supp_mask))
return torch.where(max_mask, scores, zeros)
def remove_borders(keypoints, scores, border: int, height: int, width: int):
""" Removes keypoints too close to the border """
mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border))
mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border))
mask = mask_h & mask_w
return keypoints[mask], scores[mask]
def top_k_keypoints(keypoints, scores, k: int):
if k >= len(keypoints):
return keypoints, scores
scores, indices = torch.topk(scores, k, dim=0)
return keypoints[indices], scores
def sample_descriptors(keypoints, descriptors, s: int = 8):
""" Interpolate descriptors at keypoint locations """
b, c, h, w = descriptors.shape
keypoints = keypoints - s / 2 + 0.5
keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)],
).to(keypoints)[None]
keypoints = keypoints*2 - 1 # normalize to (-1, 1)
args = {'align_corners': True} if torch.__version__ >= '1.3' else {}
descriptors = torch.nn.functional.grid_sample(
descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args)
descriptors = torch.nn.functional.normalize(
descriptors.reshape(b, c, -1), p=2, dim=1)
return descriptors
class SuperPoint(nn.Module):
"""SuperPoint Convolutional Detector and Descriptor
SuperPoint: Self-Supervised Interest Point Detection and
Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
"""
default_config = {
'descriptor_dim': 256,
'nms_radius': 4,
'keypoint_threshold': 0.005,
'max_keypoints': -1,
'remove_borders': 4,
}
def __init__(self, config):
super().__init__()
self.config = {**self.default_config, **config}
self.relu = nn.ReLU(inplace=True)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
self.convDb = nn.Conv2d(
c5, self.config['descriptor_dim'],
kernel_size=1, stride=1, padding=0)
path = Path(__file__).parent / 'weights/superpoint_v1.pth'
self.load_state_dict(torch.load(str(path)))
mk = self.config['max_keypoints']
if mk == 0 or mk < -1:
raise ValueError('\"max_keypoints\" must be positive or \"-1\"')
print('Loaded SuperPoint model')
def forward(self, data):
""" Compute keypoints, scores, descriptors for image """
# Shared Encoder
x = self.relu(self.conv1a(data['image']))
x = self.relu(self.conv1b(x))
x = self.pool(x)
x = self.relu(self.conv2a(x))
x = self.relu(self.conv2b(x))
x = self.pool(x)
x = self.relu(self.conv3a(x))
x = self.relu(self.conv3b(x))
x = self.pool(x)
x = self.relu(self.conv4a(x))
x = self.relu(self.conv4b(x))
# Compute the dense keypoint scores
cPa = self.relu(self.convPa(x))
scores = self.convPb(cPa)
scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
b, _, h, w = scores.shape
scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
scores = simple_nms(scores, self.config['nms_radius'])
# Extract keypoints
keypoints = [
torch.nonzero(s > self.config['keypoint_threshold'])
for s in scores]
scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)]
# Discard keypoints near the image borders
keypoints, scores = list(zip(*[
remove_borders(k, s, self.config['remove_borders'], h*8, w*8)
for k, s in zip(keypoints, scores)]))
# Keep the k keypoints with highest score
if self.config['max_keypoints'] >= 0:
keypoints, scores = list(zip(*[
top_k_keypoints(k, s, self.config['max_keypoints'])
for k, s in zip(keypoints, scores)]))
# Convert (h, w) to (x, y)
keypoints = [torch.flip(k, [1]).float() for k in keypoints]
# Compute the dense descriptors
cDa = self.relu(self.convDa(x))
descriptors = self.convDb(cDa)
descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
# Extract descriptors
descriptors = [sample_descriptors(k[None], d[None], 8)[0]
for k, d in zip(keypoints, descriptors)]
return {
'keypoints': keypoints,
'scores': scores,
'descriptors': descriptors,
}

567
models/utils.py Normal file
View File

@ -0,0 +1,567 @@
# %BANNER_BEGIN%
# ---------------------------------------------------------------------
# %COPYRIGHT_BEGIN%
#
# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
#
# Unpublished Copyright (c) 2020
# Magic Leap, Inc., All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains the property
# of COMPANY. The intellectual and technical concepts contained herein
# are proprietary to COMPANY and may be covered by U.S. and Foreign
# Patents, patents in process, and are protected by trade secret or
# copyright law. Dissemination of this information or reproduction of
# this material is strictly forbidden unless prior written permission is
# obtained from COMPANY. Access to the source code contained herein is
# hereby forbidden to anyone except current COMPANY employees, managers
# or contractors who have executed Confidentiality and Non-disclosure
# agreements explicitly covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes
# information that is confidential and/or proprietary, and is a trade
# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
# %COPYRIGHT_END%
# ----------------------------------------------------------------------
# %AUTHORS_BEGIN%
#
# Originating Authors: Paul-Edouard Sarlin
# Daniel DeTone
# Tomasz Malisiewicz
#
# %AUTHORS_END%
# --------------------------------------------------------------------*/
# %BANNER_END%
from pathlib import Path
import time
from collections import OrderedDict
from threading import Thread
import numpy as np
import cv2
import torch
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
class AverageTimer:
""" Class to help manage printing simple timing of code execution. """
def __init__(self, smoothing=0.3, newline=False):
self.smoothing = smoothing
self.newline = newline
self.times = OrderedDict()
self.will_print = OrderedDict()
self.reset()
def reset(self):
now = time.time()
self.start = now
self.last_time = now
for name in self.will_print:
self.will_print[name] = False
def update(self, name='default'):
now = time.time()
dt = now - self.last_time
if name in self.times:
dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name]
self.times[name] = dt
self.will_print[name] = True
self.last_time = now
def print(self, text='Timer'):
total = 0.
print('[{}]'.format(text), end=' ')
for key in self.times:
val = self.times[key]
if self.will_print[key]:
print('%s=%.3f' % (key, val), end=' ')
total += val
print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ')
if self.newline:
print(flush=True)
else:
print(end='\r', flush=True)
self.reset()
class VideoStreamer:
""" Class to help process image streams. Four types of possible inputs:"
1.) USB Webcam.
2.) An IP camera
3.) A directory of images (files in directory matching 'image_glob').
4.) A video file, such as an .mp4 or .avi file.
"""
def __init__(self, basedir, resize, skip, image_glob, max_length=1000000):
self._ip_grabbed = False
self._ip_running = False
self._ip_camera = False
self._ip_image = None
self._ip_index = 0
self.cap = []
self.camera = True
self.video_file = False
self.listing = []
self.resize = resize
self.interp = cv2.INTER_AREA
self.i = 0
self.skip = skip
self.max_length = max_length
if isinstance(basedir, int) or basedir.isdigit():
print('==> Processing USB webcam input: {}'.format(basedir))
self.cap = cv2.VideoCapture(int(basedir))
self.listing = range(0, self.max_length)
elif basedir.startswith(('http', 'rtsp')):
print('==> Processing IP camera input: {}'.format(basedir))
self.cap = cv2.VideoCapture(basedir)
self.start_ip_camera_thread()
self._ip_camera = True
self.listing = range(0, self.max_length)
elif Path(basedir).is_dir():
print('==> Processing image directory input: {}'.format(basedir))
self.listing = list(Path(basedir).glob(image_glob[0]))
for j in range(1, len(image_glob)):
image_path = list(Path(basedir).glob(image_glob[j]))
self.listing = self.listing + image_path
self.listing.sort()
self.listing = self.listing[::self.skip]
self.max_length = np.min([self.max_length, len(self.listing)])
if self.max_length == 0:
raise IOError('No images found (maybe bad \'image_glob\' ?)')
self.listing = self.listing[:self.max_length]
self.camera = False
elif Path(basedir).exists():
print('==> Processing video input: {}'.format(basedir))
self.cap = cv2.VideoCapture(basedir)
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.listing = range(0, num_frames)
self.listing = self.listing[::self.skip]
self.video_file = True
self.max_length = np.min([self.max_length, len(self.listing)])
self.listing = self.listing[:self.max_length]
else:
raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir))
if self.camera and not self.cap.isOpened():
raise IOError('Could not read camera')
def load_image(self, impath):
""" Read image as grayscale and resize to img_size.
Inputs
impath: Path to input image.
Returns
grayim: uint8 numpy array sized H x W.
"""
grayim = cv2.imread(impath, 0)
if grayim is None:
raise Exception('Error reading image %s' % impath)
w, h = grayim.shape[1], grayim.shape[0]
w_new, h_new = process_resize(w, h, self.resize)
grayim = cv2.resize(
grayim, (w_new, h_new), interpolation=self.interp)
return grayim
def next_frame(self):
""" Return the next frame, and increment internal counter.
Returns
image: Next H x W image.
status: True or False depending whether image was loaded.
"""
if self.i == self.max_length:
return (None, False)
if self.camera:
if self._ip_camera:
#Wait for first image, making sure we haven't exited
while self._ip_grabbed is False and self._ip_exited is False:
time.sleep(.001)
ret, image = self._ip_grabbed, self._ip_image.copy()
if ret is False:
self._ip_running = False
else:
ret, image = self.cap.read()
if ret is False:
print('VideoStreamer: Cannot get image from camera')
return (None, False)
w, h = image.shape[1], image.shape[0]
if self.video_file:
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i])
w_new, h_new = process_resize(w, h, self.resize)
image = cv2.resize(image, (w_new, h_new),
interpolation=self.interp)
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
image_file = str(self.listing[self.i])
image = self.load_image(image_file)
self.i = self.i + 1
return (image, True)
def start_ip_camera_thread(self):
self._ip_thread = Thread(target=self.update_ip_camera, args=())
self._ip_running = True
self._ip_thread.start()
self._ip_exited = False
return self
def update_ip_camera(self):
while self._ip_running:
ret, img = self.cap.read()
if ret is False:
self._ip_running = False
self._ip_exited = True
self._ip_grabbed = False
return
self._ip_image = img
self._ip_grabbed = ret
self._ip_index += 1
#print('IPCAMERA THREAD got frame {}'.format(self._ip_index))
def cleanup(self):
self._ip_running = False
# --- PREPROCESSING ---
def process_resize(w, h, resize):
assert(len(resize) > 0 and len(resize) <= 2)
if len(resize) == 1 and resize[0] > -1:
scale = resize[0] / max(h, w)
w_new, h_new = int(round(w*scale)), int(round(h*scale))
elif len(resize) == 1 and resize[0] == -1:
w_new, h_new = w, h
else: # len(resize) == 2:
w_new, h_new = resize[0], resize[1]
# Issue warning if resolution is too small or too large.
if max(w_new, h_new) < 160:
print('Warning: input resolution is very small, results may vary')
elif max(w_new, h_new) > 2000:
print('Warning: input resolution is very large, results may vary')
return w_new, h_new
def frame2tensor(frame, device):
return torch.from_numpy(frame/255.).float()[None, None].to(device)
def read_image(path, device, resize, rotation, resize_float):
image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
if image is None:
return None, None, None
w, h = image.shape[1], image.shape[0]
w_new, h_new = process_resize(w, h, resize)
scales = (float(w) / float(w_new), float(h) / float(h_new))
if resize_float:
image = cv2.resize(image.astype('float32'), (w_new, h_new))
else:
image = cv2.resize(image, (w_new, h_new)).astype('float32')
if rotation != 0:
image = np.rot90(image, k=rotation)
if rotation % 2:
scales = scales[::-1]
inp = frame2tensor(image, device)
return image, inp, scales
def process_image(image, device, resize, rotation, resize_float):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if image is None:
return None, None, None
w, h = image.shape[1], image.shape[0]
w_new, h_new = process_resize(w, h, resize)
scales = (float(w) / float(w_new), float(h) / float(h_new))
if resize_float:
image = cv2.resize(image.astype('float32'), (w_new, h_new))
else:
image = cv2.resize(image, (w_new, h_new)).astype('float32')
if rotation != 0:
image = np.rot90(image, k=rotation)
if rotation % 2:
scales = scales[::-1]
inp = frame2tensor(image, device)
return image, inp, scales
# --- GEOMETRY ---
def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999):
if len(kpts0) < 5:
return None
f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]])
norm_thresh = thresh / f_mean
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
E, mask = cv2.findEssentialMat(
kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf,
method=cv2.RANSAC)
assert E is not None
best_num_inliers = 0
ret = None
for _E in np.split(E, len(E) / 3):
n, R, t, _ = cv2.recoverPose(
_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask)
if n > best_num_inliers:
best_num_inliers = n
ret = (R, t[:, 0], mask.ravel() > 0)
return ret
def rotate_intrinsics(K, image_shape, rot):
"""image_shape is the shape of the image after rotation"""
assert rot <= 3
h, w = image_shape[:2][::-1 if (rot % 2) else 1]
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
rot = rot % 4
if rot == 1:
return np.array([[fy, 0., cy],
[0., fx, w-1-cx],
[0., 0., 1.]], dtype=K.dtype)
elif rot == 2:
return np.array([[fx, 0., w-1-cx],
[0., fy, h-1-cy],
[0., 0., 1.]], dtype=K.dtype)
else: # if rot == 3:
return np.array([[fy, 0., h-1-cy],
[0., fx, cx],
[0., 0., 1.]], dtype=K.dtype)
def rotate_pose_inplane(i_T_w, rot):
rotation_matrices = [
np.array([[np.cos(r), -np.sin(r), 0., 0.],
[np.sin(r), np.cos(r), 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]], dtype=np.float32)
for r in [np.deg2rad(d) for d in (0, 270, 180, 90)]
]
return np.dot(rotation_matrices[rot], i_T_w)
def scale_intrinsics(K, scales):
scales = np.diag([1./scales[0], 1./scales[1], 1.])
return np.dot(scales, K)
def to_homogeneous(points):
return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1)
def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1):
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
kpts0 = to_homogeneous(kpts0)
kpts1 = to_homogeneous(kpts1)
t0, t1, t2 = T_0to1[:3, 3]
t_skew = np.array([
[0, -t2, t1],
[t2, 0, -t0],
[-t1, t0, 0]
])
E = t_skew @ T_0to1[:3, :3]
Ep0 = kpts0 @ E.T # N x 3
p1Ep0 = np.sum(kpts1 * Ep0, -1) # N
Etp1 = kpts1 @ E # N x 3
d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2)
+ 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2))
return d
def angle_error_mat(R1, R2):
cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2
cos = np.clip(cos, -1., 1.) # numercial errors can make it out of bounds
return np.rad2deg(np.abs(np.arccos(cos)))
def angle_error_vec(v1, v2):
n = np.linalg.norm(v1) * np.linalg.norm(v2)
return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0)))
def compute_pose_error(T_0to1, R, t):
R_gt = T_0to1[:3, :3]
t_gt = T_0to1[:3, 3]
error_t = angle_error_vec(t, t_gt)
error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation
error_R = angle_error_mat(R, R_gt)
return error_t, error_R
def pose_auc(errors, thresholds):
sort_idx = np.argsort(errors)
errors = np.array(errors.copy())[sort_idx]
recall = (np.arange(len(errors)) + 1) / len(errors)
errors = np.r_[0., errors]
recall = np.r_[0., recall]
aucs = []
for t in thresholds:
last_index = np.searchsorted(errors, t)
r = np.r_[recall[:last_index], recall[last_index-1]]
e = np.r_[errors[:last_index], t]
aucs.append(np.trapz(r, x=e)/t)
return aucs
# --- VISUALIZATION ---
def plot_image_pair(imgs, dpi=100, size=6, pad=.5):
n = len(imgs)
assert n == 2, 'number of images must be two'
figsize = (size*n, size*3/4) if size is not None else None
_, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
for i in range(n):
ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255)
ax[i].get_yaxis().set_ticks([])
ax[i].get_xaxis().set_ticks([])
for spine in ax[i].spines.values(): # remove frame
spine.set_visible(False)
plt.tight_layout(pad=pad)
def plot_keypoints(kpts0, kpts1, color='w', ps=2):
ax = plt.gcf().axes
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4):
fig = plt.gcf()
ax = fig.axes
fig.canvas.draw()
transFigure = fig.transFigure.inverted()
fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0))
fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1))
fig.lines = [matplotlib.lines.Line2D(
(fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1,
transform=fig.transFigure, c=color[i], linewidth=lw)
for i in range(len(kpts0))]
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
def make_matching_plot(image0, image1, kpts0, kpts1, mkpts0, mkpts1,
color, text, path, show_keypoints=False,
fast_viz=False, opencv_display=False,
opencv_title='matches', small_text=[]):
if fast_viz:
make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1,
color, text, path, show_keypoints, 10,
opencv_display, opencv_title, small_text)
return
plot_image_pair([image0, image1])
if show_keypoints:
plot_keypoints(kpts0, kpts1, color='k', ps=4)
plot_keypoints(kpts0, kpts1, color='w', ps=2)
plot_matches(mkpts0, mkpts1, color)
fig = plt.gcf()
txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w'
fig.text(
0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes,
fontsize=15, va='top', ha='left', color=txt_color)
txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w'
fig.text(
0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes,
fontsize=5, va='bottom', ha='left', color=txt_color)
plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
plt.close()
def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0,
mkpts1, color, text, path=None,
show_keypoints=False, margin=10,
opencv_display=False, opencv_title='',
small_text=[]):
H0, W0 = image0.shape
H1, W1 = image1.shape
H, W = max(H0, H1), W0 + W1 + margin
out = 255*np.ones((H, W), np.uint8)
out[:H0, :W0] = image0
out[:H1, W0+margin:] = image1
out = np.stack([out]*3, -1)
if show_keypoints:
kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
white = (255, 255, 255)
black = (0, 0, 0)
for x, y in kpts0:
cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA)
for x, y in kpts1:
cv2.circle(out, (x + margin + W0, y), 2, black, -1,
lineType=cv2.LINE_AA)
cv2.circle(out, (x + margin + W0, y), 1, white, -1,
lineType=cv2.LINE_AA)
mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int)
color = (np.array(color[:, :3])*255).astype(int)[:, ::-1]
for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color):
c = c.tolist()
cv2.line(out, (x0, y0), (x1 + margin + W0, y1),
color=c, thickness=1, lineType=cv2.LINE_AA)
# display line end-points as circles
cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1,
lineType=cv2.LINE_AA)
# Scale factor for consistent visualization across scales.
sc = min(H / 640., 2.0)
# Big text.
Ht = int(30 * sc) # text height
txt_color_fg = (255, 255, 255)
txt_color_bg = (0, 0, 0)
for i, t in enumerate(text):
cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX,
1.0*sc, txt_color_bg, 2, cv2.LINE_AA)
cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX,
1.0*sc, txt_color_fg, 1, cv2.LINE_AA)
# Small text.
Ht = int(18 * sc) # text height
for i, t in enumerate(reversed(small_text)):
cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX,
0.5*sc, txt_color_bg, 2, cv2.LINE_AA)
cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX,
0.5*sc, txt_color_fg, 1, cv2.LINE_AA)
return out
def error_colormap(x):
return np.clip(
np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1)

BIN
models/weights/superglue_indoor.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
models/weights/superglue_outdoor.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
models/weights/superpoint_v1.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
outdoor-1.JPEG Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

BIN
outdoor-2.JPEG Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

61
requirements.txt Normal file
View File

@ -0,0 +1,61 @@
aiohttp==3.8.3
aiosignal==1.2.0
anyio==3.6.2
async-timeout==4.0.2
attrs==22.1.0
bcrypt==4.0.1
certifi==2022.9.24
cffi==1.15.1
charset-normalizer==2.1.1
click==8.1.3
contourpy==1.0.6
cryptography==38.0.1
cycler==0.11.0
fastapi==0.85.1
ffmpy==0.3.0
fonttools==4.38.0
frozenlist==1.3.1
fsspec==2022.10.0
gradio==3.8.1
h11==0.12.0
httpcore==0.15.0
httpx==0.23.0
idna==3.4
Jinja2==3.1.2
kiwisolver==1.4.4
linkify-it-py==1.0.3
markdown-it-py==2.1.0
MarkupSafe==2.1.1
matplotlib==3.6.1
mdit-py-plugins==0.3.1
mdurl==0.1.2
multidict==6.0.2
numpy==1.23.4
opencv-python==4.6.0.66
orjson==3.8.1
packaging==21.3
pandas==1.5.1
paramiko==2.11.0
Pillow==9.3.0
pycparser==2.21
pycryptodome==3.15.0
pydantic==1.10.2
pydub==0.25.1
PyNaCl==1.5.0
pyparsing==3.0.9
python-dateutil==2.8.2
python-multipart==0.0.5
pytz==2022.5
PyYAML==6.0
requests==2.28.1
rfc3986==1.5.0
six==1.16.0
sniffio==1.3.0
starlette==0.20.4
torch==1.13.0
typing_extensions==4.4.0
uc-micro-py==1.0.1
urllib3==1.26.12
uvicorn==0.19.0
websockets==10.4
yarl==1.8.1

BIN
taj-1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 478 KiB

BIN
taj-2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB