%matplotlib inline
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import time
# open the input images
I1 = np.array(Image.open('price_center20.JPG'), dtype='float')/255.
I2 = np.array(Image.open('price_center21.JPG'), dtype='float')/255.
# Display the input images
plt.figure(figsize=(14,8))
plt.subplot(1,2,1)
plt.imshow(I1)
plt.subplot(1,2,2)
plt.imshow(I2)
plt.show()
Download input data from the course website. The file price_center20.JPG contains image 1 and the file price_center21.JPG contains image 2.
For each input image, calculate an image where each pixel value is the minor eigenvalue of the gradient matrix
$N=\left[ \begin{array}{cc} \sum\limits_w I_x^2 & \sum\limits_w I_x I_y\\ \sum\limits_w I_x I_y & \sum\limits_w I_y^2 \end{array} \right]$
where w is the window about the pixel, and $I_x$ and $I_y$ are the gradient images in the x and y direction, respectively. Calculate the gradient images using the fivepoint central difference operator. Set resulting values that are below a specified threshold value to zero (hint: calculating the mean instead of the sum in N allows for adjusting the size of the window without changing the threshold value). Apply an operation that suppresses (sets to 0) local (i.e., about a window) nonmaximum pixel values in the minor eigenvalue image. Vary these parameters such that around 600–650 features are detected in each image. For resulting nonzero pixel values, determine the subpixel feature coordinate using the Forstner corner point operator.
from scipy import signal
def rgb2gray(rgb):
return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
def ImageGradient(I, w, t):
# inputs:
# I is the input image (may be mxn for Grayscale or mxnx3 for RGB)
# w is the size of the window used to compute the gradient matrix N
# t is the minor eigenvalue threshold
#
# outputs:
# N is the 2x2xmxn gradient matrix
# b in the 2x1xmxn vector used in the Forstner corner detector
# J0 is the mxn minor eigenvalue image of N before thresholding
# J1 is the mxn minor eigenvalue image of N after thresholding
"""your code here"""
m,n = I.shape[:2]
I = I*255
I = rgb2gray(I)
N = np.zeros((2,2,m,n))
b = np.zeros((2,1,m,n))
J0 = np.zeros((m,n))
J1 = np.zeros((m,n))
kx = np.reshape(1/12*np.array([-1, 8, 0, -8, 1]),(1,5))
ky = kx.transpose()
Ix = signal.convolve2d(I, kx, boundary='symm', mode='same')
Iy = signal.convolve2d(I, ky, boundary='symm', mode='same')
Ix2 = Ix*Ix
Iy2 = Iy*Iy
Ixy = Ix*Iy
X = np.array([[i for i in range(n)] for j in range(m)])
Y = np.array([[j for i in range(n)] for j in range(m)])
xIx2_ = X*Ix2 + Y*Ix*Iy
xIxIy_ = X*Ix*Iy + Y*Iy2
d = int((w-1)/2)
for y in range(d,m-d):
for x in range(d,n-d):
Ix2_win = Ix2[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
Iy2_win = Iy2[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
Ixy_win = Ixy[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
x2 = np.mean(Ix2_win)
y2 = np.mean(Iy2_win)
xy = np.mean(Ixy_win)
nxy = np.array([[x2, xy], [xy, y2]])
N[:,:,y,x] = nxy
xIx2_win = xIx2_[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
xIxIy_win = xIxIy_[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
b1 = np.mean(xIx2_win)
b2 = np.mean(xIxIy_win)
b[:,:,y,x] = [[b1],[b2]]
meig = (np.trace(nxy)- (np.trace(nxy)**2 - 4*np.linalg.det(nxy))**0.5)*0.5
J0[y,x] = meig
if meig > t:
J1[y,x] = meig
return N, b, J0, J1
def NMS(J, w_nms):
# Apply nonmaximum supression to J using window w
# For any window in J, the result should only contain 1 nonzero value
# In the case of multiple identical maxima in the same window,
# the tie may be broken arbitrarily
#
# inputs:
# J is the minor eigenvalue image input image after thresholding
# w_nms is the size of the local nonmaximum suppression window
#
# outputs:
# J2 is the mxn resulting image after applying nonmaximum suppression
#
J2 = J.copy()
"""your code here"""
m,n = J.shape[:2]
d = int((w_nms-1)/2)
for y in range(d,m-d):
for x in range(d,n-d):
Imax = np.max(J[int(y-d):int(y+d+1),int(x-d):int(x+d+1)])
if J2[y,x] < Imax:
J2[y,x] = 0
return J2
def ForstnerCornerDetector(J, N, b):
# Gather the coordinates of the nonzero pixels in J
# Then compute the sub pixel location of each point using the Forstner operator
#
# inputs:
# J is the NMS image
# N is the 2x2xmxn gradient matrix
# b is the 2x1xmxn vector computed in the image_gradient function
#
# outputs:
# C is the number of corners detected in each image
# pts is the 2xC list of coordinates of subpixel accurate corners
# found using the Forstner corner detector
"""your code here"""
coord = np.transpose(np.nonzero(J))
C = coord.shape[0]
pts = np.zeros((2,C))
for i in range(coord.shape[0]):
y = coord[i][0]
x = coord[i][1]
A = N[:,:,y,x]
b_= b[:,:,y,x]
corner = np.linalg.solve(A,b_)
corner = corner.reshape((2)).astype(int)
pts[:,i] = corner
return C, pts
# feature detection
def RunFeatureDetection(I, w, t, w_nms):
N, b, J0, J1 = ImageGradient(I, w, t)
J2 = NMS(J1, w_nms)
C, pts = ForstnerCornerDetector(J2, N, b)
return C, pts, J0, J1, J2
# input images
I1 = np.array(Image.open('price_center20.JPG'), dtype='float')/255.
I2 = np.array(Image.open('price_center21.JPG'), dtype='float')/255.
# parameters to tune
w = 9
t = 8
w_nms = 9
tic = time.time()
# run feature detection algorithm on input images
C1, pts1, J1_0, J1_1, J1_2 = RunFeatureDetection(I1, w, t, w_nms)
C2, pts2, J2_0, J2_1, J2_2 = RunFeatureDetection(I2, w, t, w_nms)
toc = time.time() - tic
print('took %f secs'%toc)
# display results
plt.figure(figsize=(14,24))
# show corners on original images
ax = plt.subplot(1,2,1)
plt.imshow(I1)
for i in range(C1): # draw rectangles of size w around corners
x,y = pts1[:,i]
ax.add_patch(patches.Rectangle((x-w/2,y-w/2),w,w, fill=False))
# plt.plot(pts1[0,:], pts1[1,:], '.b') # display subpixel corners
plt.title('found %d corners'%C1)
ax = plt.subplot(1,2,2)
plt.imshow(I2)
for i in range(C2):
x,y = pts2[:,i]
ax.add_patch(patches.Rectangle((x-w/2,y-w/2),w,w, fill=False))
# plt.plot(pts2[0,:], pts2[1,:], '.b')
plt.title('found %d corners'%C2)
plt.show()
Determine the set of one-to-one putative feature correspondences by performing a brute-force search for the greatest correlation coefficient value (in the range [-1, 1]) between the detected features in image 1 and the detected features in image 2. Only allow matches that are above a specified correlation coefficient threshold value (note that calculating the correlation coefficient allows for adjusting the size of the matching window without changing the threshold value). Further, only allow matches that are above a specified distance ratio threshold value, where distance is measured to the next best match for a given feature. Vary these parameters such that around 200 putative feature correspondences are established. Optional: constrain the search to coordinates in image 2 that are within a proximity of the detected feature coordinates in image 1.
def compute_ncc(img1, img2,c1,c2,R):
'''
helper: compute ncc for two windows
'''
matching_score = 0
num = 0
sum1 = 0
sum2 = 0
mean1 = np.sum(img1[int(c1[1]-R):int(c1[1]+R+1),int(c1[0]-R):int(c1[0]+R+1)])/(R*R)
mean2 = np.sum(img2[int(c2[1]-R):int(c2[1]+R+1),int(c2[0]-R):int(c2[0]+R+1)])/(R*R)
for y in range((-R),(R+1)):
for x in range((-R),(R+1)):
num += (img1[c1[1]+y,c1[0]+x]-mean1)*(img2[c2[1]+y,c2[0]+x]-mean2)
sum1 += (img1[c1[1]+y,c1[0]+x]-mean1)**2
sum2 += (img2[c2[1]+y,c2[0]+x]-mean2)**2
matching_score = num/np.sqrt(sum1*sum2)
return matching_score
def NCC(I1, I2, pts1, pts2, w, p):
# compute the normalized cross correlation between image patches I1, I2
# result should be in the range [-1,1]
#
# inputs:
# I1, I2 are the input images
# pts1, pts2 are the point to be matched
# w is the size of the matching window to compute correlation coefficients
# p is the size of the proximity window
#
# output:
# normalized cross correlation matrix of scores between all windows in
# image 1 and all windows in image 2
"""your code here"""
R = int((w-1)/2)
scores = np.zeros((pts1.shape[1],pts2.shape[1]))
I1_gray = rgb2gray(I1)
I2_gray = rgb2gray(I2)
for i in range(pts1.shape[1]):
for j in range(pts2.shape[1]):
c1 = pts1[:,i].astype(int)
c2 = pts2[:,j].astype(int)
if np.sqrt((c1[0]-c2[0])**2 + (c1[1]-c2[1])**2) < p:
if c1[0] > R and c1[0]+R+1 < I1.shape[1] and c1[1] > R and c1[1]+R+1 < I1.shape[0]:
if c2[0] > R and c2[0]+R+1 < I2.shape[1] and c2[1] > R and c2[1]+R+1 < I2.shape[0]:
scor = compute_ncc(I1_gray,I2_gray,c1,c2,R)
scores[i,j] = scor
return scores
def Match(scores, t, d):
# perform the one-to-one correspondence matching on the correlation coefficient matrix
#
# inputs:
# scores is the NCC matrix
# t is the correlation coefficient threshold
# d distance ration threshold
#
# output:
# list of the feature coordinates in image 1 and image 2
"""your code here"""
inds = np.zeros((2,1))
mask = np.ones(scores.shape)
masked_scores = mask * scores
max_cc = np.max(masked_scores)
next_max = 0
while t < max_cc :
indices = np.where(masked_scores == max_cc)
scores[int(indices[0]),int(indices[1])] = -1
next_max = np.max(scores[int(indices[0]),:])
if np.max(scores[:,int(indices[1])]) > next_max:
next_max = np.max(scores[:,int(indices[1])])
scores[int(indices[0]),int(indices[1])] = max_cc
if (1-max_cc) < (1-next_max)*d:
inds = np.hstack((inds,[[int(indices[0])],[int(indices[1])]]))
mask[int(indices[0]),:] = 0
mask[:,int(indices[1])] = 0
masked_scores = mask * scores
max_cc = np.max(masked_scores)
inds = inds[:,1:].astype(int)
return inds
def RunFeatureMatching(I1, I2, pts1, pts2, w, t, d, p=0):
# inputs:
# I1, I2 are the input images
# pts1, pts2 are the point to be matched
# w is the size of the matching window to compute correlation coefficients
# t is the correlation coefficient threshold
# d distance ration threshold
# p is the size of the proximity window
#
# outputs:
# inds is a 2xk matrix of matches where inds[0,i] indexs a point pts1
# and inds[1,i] indexs a point in pts2, where k is the number of matches
scores = NCC(I1, I2, pts1, pts2, w, p)
inds = Match(scores, t, d)
return inds
# parameters to tune
w = 11
t = 0.75
d = 0.8
p = 90
tic = time.time()
# run the feature matching algorithm on the input images and detected features
inds = RunFeatureMatching(I1, I2, pts1, pts2, w, t, d, p)
toc = time.time() - tic
print('took %f secs'%toc)
# create new matrices of points which contain only the matched features
match1 = pts1[:,inds[0,:]]
match2 = pts2[:,inds[1,:]]
# # display the results
plt.figure(figsize=(14,24))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
ax1.imshow(I1)
ax2.imshow(I2)
plt.title('found %d putative matches'%match1.shape[1])
for i in range(match1.shape[1]):
x1,y1 = match1[:,i]
x2,y2 = match2[:,i]
ax1.plot([x1, x2],[y1, y2],'-r')
ax1.add_patch(patches.Rectangle((x1-w/2,y1-w/2),w,w, fill=False))
ax2.plot([x2, x1],[y2, y1],'-r')
ax2.add_patch(patches.Rectangle((x2-w/2,y2-w/2),w,w, fill=False))
plt.show()
print('unique points in image 1: %d'%np.unique(inds[0,:]).shape[0])
print('unique points in image 2: %d'%np.unique(inds[1,:]).shape[0])
The resulting set of putative point correspondences should contain both inlier and outlier correspondences (i.e., false matches). Determine the set of inlier point correspondences using the M-estimator Sample Consensus (MSAC) algorithm, where the maximum number of attempts to find a consensus set is determined adaptively. For each trial, you must use the 4-point algorithm (as described in lecture) to estimate the planar projective transformation from the 2D points in image 1 to the 2D points in image 2. Calculate the (squared) Sampson error as a first order approximation to the geometric error.
hint: this problem has codimension 2
from scipy.stats import chi2
import random
def Homogenize(x):
# converts points from inhomogeneous to homogeneous coordinates
return np.vstack((x,np.ones((1,x.shape[1]))))
def Dehomogenize(x):
# converts points from homogeneous to inhomogeneous coordinates
return x[:-1]/x[-1]
def sampson_error(H,x1,x2):
# calculate sampson error of x1 and x2
error = np.zeros((x1.shape[1]))
delta = np.zeros((4,x1.shape[1]))
for i in range(x1.shape[1]):
epsilon = np.array([[-(x1[0,i]*H[1,0]+x1[1,i]*H[1,1]+H[1,2])
+ x2[1,i]*(x1[0,i]*H[2,0]+x1[1,i]*H[2,1]+H[2,2])],
[x1[0,i]*H[0,0]+x1[1,i]*H[0,1]+H[0,2]
- x2[0,i]*(x1[0,i]*H[2,0]+x1[1,i]*H[2,1]+H[2,2])]])
J = np.array([[-H[1,0]+x2[1,i]*H[2,0], -H[1,1]+x2[1,i]*H[2,1],
0, x1[0,i]*H[2,0]+x1[1,i]*H[2,1]+H[2,2]],
[H[0,0]-x2[0,i]*H[2,0],H[0,1]-x2[0,i]*H[2,1],
-(x1[0,i]*H[2,0]+x1[1,i]*H[2,1]+H[2,2]),0]])
error[i] = epsilon.T @ np.linalg.inv( J @ J.T ) @ epsilon
d = J.T @ np.linalg.solve((J@J.T), -epsilon)
delta[:,i] = d.reshape(-1)
return error ,delta
def MSAC(pts1, pts2, thresh, tol, p):
# Inputs:
# pts1 - matched feature correspondences in image 1
# pts2 - matched feature correspondences in image 2
# thresh - cost threshold
# tol - reprojection error tolerance
# p - probability that as least one of the random samples does not contain any outliers
#
# Output:
# consensus_min_cost - final cost from MSAC
# consensus_min_cost_model - planar projective transformation matrix H
# inliers - list of indices of the inliers corresponding to input data
# trials - number of attempts taken to find consensus set
"""your code here"""
x1_homo = Homogenize(pts1)
x2_homo = Homogenize(pts2)
x1_inhomo = pts1
x2_inhomo = pts2
p = 0.99
s = 4
trials = 0
max_trials = np.inf
consensus_min_cost = np.inf
consensus_min_cost_model = np.zeros((3,4))
while (trials < max_trials and consensus_min_cost > thresh):
trials = trials + 1
# random samples
num = random.sample(range(0, x1_homo.shape[1]), 4)
x1_1 = x1_homo[:,num[0]].reshape(-1,1)
x1_2 = x1_homo[:,num[1]].reshape(-1,1)
x1_3 = x1_homo[:,num[2]].reshape(-1,1)
x1_4 = x1_homo[:,num[3]].reshape(-1,1)
x2_1 = x2_homo[:,num[0]].reshape(-1,1)
x2_2 = x2_homo[:,num[1]].reshape(-1,1)
x2_3 = x2_homo[:,num[2]].reshape(-1,1)
x2_4 = x2_homo[:,num[3]].reshape(-1,1)
# calculate model
A1 = np.hstack((x1_1,x1_2,x1_3))
lam1 = np.linalg.solve(A1,x1_4)
H1 = np.linalg.inv(np.hstack((lam1[0]*x1_1,lam1[1]*x1_2,lam1[2]*x1_3)))
A2 = np.hstack((x2_1,x2_2,x2_3))
lam2 = np.linalg.solve(A2,x2_4)
H2 = np.linalg.inv(np.hstack((lam2[0]*x2_1,lam2[1]*x2_2,lam2[2]*x2_3)))
H = np.linalg.inv(H2) @ H1
# calculate error
error,delta = sampson_error(H,x1_inhomo,x2_inhomo)
# calculate cost
cost = 0
for i in range(x1_homo.shape[1]):
if error[i] <= tol:
cost = cost + error[i]
else:
cost = cost + tol
if cost < consensus_min_cost:
consensus_min_cost = cost
consensus_min_cost_model = H
n_inliers = (error <= tol)
w = np.sum(n_inliers)/x1_homo.shape[1]
max_trials = np.log(1-p) / np.log(1-w**s)
inliers = np.where(n_inliers == True)[0]
return consensus_min_cost, consensus_min_cost_model, inliers, trials
# MSAC parameters
thresh = 300
tol = chi2.ppf(0.95,2)
p = 0.99
alpha = 0.95
tic=time.time()
cost_MSAC, H_MSAC, inliers, trials = MSAC(match1, match2, thresh, tol, p)
# choose just the inliers
x1 = match1[:,inliers]
x2 = match2[:,inliers]
outliers = np.setdiff1d(np.arange(pts1.shape[1]),inliers)
toc=time.time()
time_total=toc-tic
# display the results
print('took %f secs'%time_total)
print('%d iterations'%trials)
print('inlier count: ',len(inliers))
print('inliers: ',inliers)
print('MSAC Cost=%.9f'%cost_MSAC)
print('H_MSAC = ')
print(H_MSAC)
# display the figures
"""your code here"""
plt.figure(figsize=(14,24))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
ax1.imshow(I1)
ax2.imshow(I2)
plt.title('found %d putative matches'%match1.shape[1])
for i in range(x1.shape[1]):
x_1,y_1 = x1[:,i]
x_2,y_2 = x2[:,i]
ax1.plot([x_1, x_2],[y_1, y_2],'-r')
ax1.add_patch(patches.Rectangle((x_1-w/2,y_1-w/2),w,w, fill=False))
ax2.plot([x_2, x_1],[y_2, y_1],'-r')
ax2.add_patch(patches.Rectangle((x_2-w/2,y_2-w/2),w,w, fill=False))
plt.show()
Estimate the planar projective transformation $\boldsymbol{H}_\text{DLT}$ from the resulting set of inlier correspondences using the direct linear transformation (DLT) algorithm (with data normalization). You must express $\boldsymbol{x}'_i = \boldsymbol{H} \boldsymbol{x}_i$ as $[\boldsymbol{x}'_i]^\perp \boldsymbol{H} \boldsymbol{x}_i = \boldsymbol{0}$ (not $\boldsymbol{x}'_i \times \boldsymbol{H} \boldsymbol{x}_i = \boldsymbol{0}$), where $[\boldsymbol{x}'_i]^\perp \boldsymbol{x}'_i = \boldsymbol{0}$, when forming the solution. Return $\boldsymbol{H}_\text{DLT}$, scaled such that $||\boldsymbol{H}_\text{DLT}||_\text{Fro} = 1$
def ComputeCost(H, x1, x2,sigma1,sigma2):
# x1 homo points in image1
# x2 homo points in image2
n = x1.shape[1]
error,delta = sampson_error(H,Dehomogenize(x1),Dehomogenize(x2))
e1 = Dehomogenize(x1) - (Dehomogenize(x1) + delta[:2,:])
e1 = e1.reshape(-1,1)
e2 = (Dehomogenize(x2) -
Dehomogenize((H @ Homogenize((Dehomogenize(x1) + delta[:2,:])))))
e2 = e2.reshape(-1,1)
cost = (e1.T @ np.linalg.inv(sigma1) @ e1
+ e2.T @ np.linalg.inv(sigma2) @ e2)
return cost
def Normalize(pts):
# data normalization of n dimensional pts
#
# Input:
# pts - is in inhomogeneous coordinates
# Outputs:
# pts - data normalized points
# T - corresponding transformation matrix
"""your code here"""
homo_pts = Homogenize(pts)
mean = np.mean(pts,axis=1)
# for 2D points
if pts.shape[0] == 2:
s = (2/np.sum(np.var(pts,axis=1)))**0.5
T = np.array([[s,0,-mean[0]*s],
[0,s,-mean[1]*s],
[0,0,1]])
# for 3D points
if pts.shape[0] == 3:
s = (3/np.sum(np.var(pts,axis=1)))**0.5
T = np.array([[s, 0, 0, -mean[0]*s],
[0, s, 0, -mean[1]*s],
[0, 0, s, -mean[2]*s],
[0, 0, 0, 1]])
pts = T @ homo_pts
return pts, T
def DLT(x1, x2, normalize=True):
# Inputs:
# x1 - inhomogeneous inlier correspondences in image 1
# x2 - inhomogeneous inlier correspondences in image 1
# normalize - if True, apply data normalization to x1 and x2
#
# Outputs:
# H - the DLT estimate of the planar projective transformation
# cost - linear estimate cost
"""your code here"""
n = x1.shape[1]
# data normalization
if normalize:
print('normalize')
x1, T1 = Normalize(x1)
x2, T2 = Normalize(x2)
else:
x1 = Homogenize(x1)
x2 = Homogenize(x2)
A = np.zeros((1,9))
for i in range(n):
xi_1 = x1[:,i].reshape(-1,1)
xi_2 = x2[:,i].reshape(-1,1)
# left nullspace of x2
v = (xi_2 + np.sign(xi_2[0])
*np.linalg.norm(xi_2)
*np.array([1,0,0]).reshape(-1,1))
Hv = np.eye(3) - 2*((v @ v.T)/(v.T @ v))
x_ln = Hv[1:,:]
A = np.vstack((A, np.kron(x_ln,xi_1.T)))
A = A[1:,:]
u, s, vh = np.linalg.svd(A)
H = vh[-1,:].reshape(3,3)
# data denormalize
if normalize:
print('denormalize')
# calculate cost
sigma1 = np.eye(2*x1.shape[1])*(T1[0,0]**2) # 2n*2n
sigma2 = np.eye(2*x2.shape[1])*(T2[0,0]**2) # 2n*2n
cost = ComputeCost(H,x1,x2,sigma1,sigma2)
H = np.linalg.inv(T2) @ H @ T1
else :
# calculate cost
cost = ComputeCost(H,x1,x2,np.eye(2*x1.shape[1]),np.eye(2*x1.shape[1]))
H = H/np.linalg.norm(H,'fro')
return H, cost
# compute the linear estimate without data normalization
print ('Running DLT without data normalization')
time_start=time.time()
H_DLT, cost = DLT(x1, x2, normalize=False)
time_total=time.time()-time_start
# display the results
print('took %f secs'%time_total)
print('Cost=%.9f'%cost)
# compute the linear estimate with data normalization
print ('Running DLT with data normalization')
time_start=time.time()
H_DLT, cost = DLT(x1, x2, normalize=True)
time_total=time.time()-time_start
# display the results
print('took %f secs'%time_total)
print('Cost=%.9f'%cost)
# display your H_DLT, scaled with its frobenius norm
print('H_DLT = ')
print(H_DLT)
Use $\boldsymbol{H}_\text{DLT}$ and the Sampson corrected points (in image 1) as an initial estimate to an iterative estimation method, specifically the sparse Levenberg-Marquardt algorithm, to determine the Maximum Likelihood estimate of the planar projective transformation that minimizes the reprojection error. You must parameterize the planar projective transformation matrix and the homogeneous 2D scene points that are being adjusted using the parameterization of homogeneous vectors (see section A6.9.2 (page 624) of the textbook, and the corrections and errata).
Report the initial cost (i.e. cost at iteration 0) and the cost at the end of each successive iteration. Show the numerical values for the final estimate of the planar projective transformation matrix $\boldsymbol{H}_\text{LM}$, scaled such that $||\boldsymbol{H}_\text{LM}||_\text{Fro} = 1$.
# Note that np.sinc is different than defined in class
def Sinc(x):
# Returns a scalar valued sinc value
"""your code here"""
if x == 0:
y = 1
else :
y = np.sin(x)/x
return y
def d_sinc(x):
if x == 0:
return 0
else:
return (np.cos(x)/(x)-np.sin(x)/(x**2))
def Parameterize(H):
# wrapper function to interface with LM
# takes all optimization variables and parameterizes all of them
return ParameterizeHomog(H.reshape(-1,1))
def Deparameterize(p):
# Deparameterize all optimization variables
return DeParameterizeHomog(p).reshape(3,3)
def ParameterizeHomog(V):
# Given a homogeneous vector V return its minimal parameterization
"""your code here"""
V = V/np.linalg.norm(V)
V = V.reshape(-1,1)
a = V[0]
b = V[1:]
v_hat = (2/(Sinc(np.arccos(a))))*b
if np.linalg.norm(v_hat) > np.pi:
v_hat = (1 - 2*np.pi/np.linalg.norm(v_hat)*
np.ceil((np.linalg.norm(v_hat)-np.pi)/(2*np.pi)))*v_hat
return v_hat
def DeParameterizeHomog(v):
# Given a parameterized homogeneous vector return its deparameterization
"""your code here"""
v_norm = np.linalg.norm(v)
a = np.cos(v_norm/2).reshape(-1,1)
b = (Sinc(v_norm/2)/2)*v
b = b.reshape(-1,1)
v_bar = np.vstack((a,b))
return v_bar
def d_V_v(v):
# v is parametrized vector
v = v.reshape(-1,1)
n = v.shape[0]
if np.linalg.norm(v) == 0:
dV_v = np.concatenate([np.zeros((1,n)),np.eye(n)*0.5],axis = 0)
else:
v_norm = np.linalg.norm(v)
a = np.cos(v_norm/2).reshape(-1,1)
b = (Sinc(v_norm/2)/2)*v
b = b.reshape(-1,1)
db_v = ((Sinc(v_norm/2)/2)*np.eye(n)
+ 1/(4*v_norm)*d_sinc(v_norm/2)*(v @ v.T))
dV_v = np.concatenate([b.T*(-0.5),db_v],axis = 0)
return dV_v
def Jacobian(H,xs_hat):
# xs_hat 2 X n
# H 3X3
h_hat = Parameterize(H).reshape(-1,1)
xs = np.zeros((3,1))
for i in range(xs_hat.shape[1]):
xs = np.hstack((xs,DeParameterizeHomog(xs_hat[:,i])))
xs = xs[:,1:] # 3 X n
x1_hat = Dehomogenize(xs) # 2 * n
x2_hat = H @ xs
x2_hat = Dehomogenize(x2_hat) # 2 * n
x_1 = Homogenize(x1_hat) # 3 * n
x_2 = Homogenize(x2_hat) # 3 * n
A = np.zeros((1,8))
B1 = np.zeros((1,2))
B2 = np.zeros((1,2))
for i in range(xs_hat.shape[1]):
# A
w = H[2,:].reshape(1,-1) @ xs[:,i]
dx2_h = np.array([[xs[0,i],xs[1,i],xs[2,i],
0,0,0,-x2_hat[0,i]*xs[0,i],
-x2_hat[0,i]*xs[1,i],
-x2_hat[0,i]*xs[2,i]],
[0,0,0,xs[0,i],xs[1,i],xs[2,i],
-x2_hat[1,i]*xs[0,i],
-x2_hat[1,i]*xs[1,i],
-x2_hat[1,i]*xs[2,i]]])
dx2_h = dx2_h / w
dh_h = d_V_v(h_hat)
Ai = dx2_h @ dh_h
A = np.vstack((A,Ai))
# B1
H_I = np.eye(3)
w = H_I[2,:].reshape(1,-1) @ xs[:,i]
dx1_x = np.vstack((H_I[0,:] - x1_hat[0,i]*H_I[2,:],
H_I[1,:] - x1_hat[1,i]*H_I[2,:]))
dx1_x = dx1_x / w
dX_x = d_V_v(xs_hat[:,i])
B1i = dx1_x @ dX_x
B1 = np.vstack((B1,B1i))
# B2
w = H[2,:].reshape(1,-1) @ xs[:,i]
dx2_x = np.vstack((H[0,:] - x2_hat[0,i]*H[2,:],
H[1,:] - x2_hat[1,i]*H[2,:]))
dx2_x = dx2_x / w
dX_x = d_V_v(xs_hat[:,i])
B2i = dx2_x @ dX_x
B2 = np.vstack((B2,B2i))
A = A[1:,:]
B1= B1[1:,:]
B2= B2[1:,:]
return A,B1,B2
from scipy.linalg import block_diag
def LM(H, x1, x2, max_iters, lam):
# Input:
# H - DLT estimate of planar projective transformation matrix
# x1 - inhomogeneous inlier points in image 1
# x2 - inhomogeneous inlier points in image 2
# max_iters - maximum number of iterations
# lam - lambda parameter
# Output:
# H - Final H (3x3) obtained after convergence
"""your code here"""
n = x1.shape[1]
# data normalization
x1_norm, T1 = Normalize(x1)
x2_norm, T2 = Normalize(x2)
H = T2 @ H @ np.linalg.inv(T1)
sigma1 = np.eye(2*x1.shape[1])*(T1[0,0]**2) # 2n*2n
sigma2 = np.eye(2*x2.shape[1])*(T2[0,0]**2) # 2n*2n
# parameterize
h_hat = Parameterize(H) # 8 X 1
error,delta = sampson_error(H,Dehomogenize(x1_norm),Dehomogenize(x2_norm))
xs_inhomo = Dehomogenize(x1_norm) + delta[:2,:]
xs_homo = Homogenize(xs_inhomo)
xs_hat = np.zeros((2,1))
for i in range(xs_homo.shape[1]):
xs_hat = np.hstack((xs_hat,ParameterizeHomog(xs_homo[:,i])))
xs_hat = xs_hat[:,1:] # 2 X n
#step 1
# deparametrize
xs = np.zeros((3,1))
for i in range(xs_hat.shape[1]):
xs = np.hstack((xs,DeParameterizeHomog(xs_hat[:,i])))
xs = xs[:,1:] # 3 X n
H = Deparameterize(h_hat)
# cost
epsilon1 = Dehomogenize(x1_norm) - Dehomogenize(xs)
e1 = epsilon1.reshape(-1,1)
epsilon2 = Dehomogenize(x2_norm) - Dehomogenize((H @ xs))
e2 = epsilon2.reshape(-1,1)
cost = (e1.T @ np.linalg.inv(sigma1) @ e1
+ e2.T @ np.linalg.inv(sigma2) @ e2)
iters = 0
for i in range(max_iters):
# step 2
A,B1,B2 = Jacobian(H,xs_hat)
# step 3
# normal equation matrix
U = A.T @ np.linalg.inv(sigma2) @ A
V = np.zeros((2,2,n))
for j in range(n):
V[:,:,j] = ((B1[2*j:2*j+2,:].T
@ np.linalg.inv(sigma1[:2,:2])
@ B1[2*j:2*j+2,:])
+(B2[2*j:2*j+2,:].T
@ np.linalg.inv(sigma2[:2,:2])
@ B2[2*j:2*j+2,:]))
W = np.zeros((8,2,n))
for j in range(n):
W[:,:,j] = (A[2*j:2*j+2,:].T
@ np.linalg.inv(sigma2[:2,:2])
@ B2[2*j:2*j+2,:])
# normal equation vector
epsilon_a = np.zeros((8,1))
epsilon_b = np.zeros((2,n))
for j in range(n):
epsilon_a = epsilon_a + (A[2*j:2*j+2,:].T
@ np.linalg.inv(sigma2[:2,:2])
@ epsilon2[:,j]).reshape(-1,1)
epsilon_b[:,j] = ((B1[2*j:2*j+2,:].T
@ np.linalg.inv(sigma1[:2,:2])
@ epsilon1[:,j])
+ (B2[2*j:2*j+2,:].T
@ np.linalg.inv(sigma2[:2,:2])
@ epsilon2[:,j]))
flag = 1
while flag:
#step 4
delta_b = np.zeros((2,n))
S = U + lam*np.eye(8)
e = epsilon_a
for j in range(n):
S = S - (W[:,:,j]
@ np.linalg.inv(V[:,:,j]+lam*np.eye(2))
@ W[:,:,j].T)
e = e - (W[:,:,j]
@ np.linalg.inv(V[:,:,j]+lam*np.eye(2))
@ epsilon_b[:,j]).reshape(-1,1)
delta_a = np.linalg.solve(S,e)
for j in range(n):
delta_b[:,j] = (np.linalg.inv(V[:,:,j]+lam*np.eye(2))
@ (epsilon_b[:,j]
- (W[:,:,j].T @ delta_a).reshape(-1)))
#step 5
h_hat0 = h_hat + delta_a
xs_hat0 = xs_hat + delta_b
#step 6
# deparametrize
xs0 = np.zeros((3,1))
for j in range(xs_hat0.shape[1]):
xs0 = np.hstack((xs0,DeParameterizeHomog(xs_hat0[:,j])))
xs0 = xs0[:,1:] # 3 X n
H0 = Deparameterize(h_hat0)
# cost
epsilon10 = Dehomogenize(x1_norm) - Dehomogenize(xs0)
e10 = epsilon10.reshape(-1,1)
epsilon20 = Dehomogenize(x2_norm) - Dehomogenize((H0 @ xs0))
e20 = epsilon20.reshape(-1,1)
cost0 = (e10.T @ np.linalg.inv(sigma1) @ e10
+ e20.T @ np.linalg.inv(sigma2) @ e20)
cost = (e1.T @ np.linalg.inv(sigma1) @ e1
+ e2.T @ np.linalg.inv(sigma2) @ e2)
#step 7
if cost0 < cost:
h_hat = h_hat0
xs_hat = xs_hat0
H = H0
e1 = e10
e2 = e20
lam = 0.1 * lam
iters = iters + 1
flag = 0
else :
lam = 10 * lam
print ('iter %03d Cost %.9f'%(i+1, cost0))
# terminate condition
if 1e-7 > (1 - (cost0 / cost)):
break
# data denormalization
H = np.linalg.inv(T2) @ H @ T1
H = H/np.linalg.norm(H,'fro')
return H
# LM hyperparameters
lam = .001
max_iters = 100
# Run LM initialized by DLT estimate with data normalization
print ('Running sparse LM with data normalization')
print ('iter %03d Cost %.9f'%(0, cost))
time_start=time.time()
H_LM = LM(H_DLT, x1, x2, max_iters, lam)
time_total=time.time()-time_start
print('took %f secs'%time_total)
# display your converged H_LM, scaled with its frobenius norm
print('H_LM = ')
print(H_LM)