Download input data from the course website. The file price_center20.JPG contains image 1 and the file price_center21.JPG contains image 2.
For each input image, calculate an image where each pixel value is the minor eigenvalue of the gradient matrix
$N=\left[ \begin{array}{cc} \sum\limits_w I_x^2 & \sum\limits_w I_x I_y\\ \sum\limits_w I_x I_y & \sum\limits_w I_y^2 \end{array} \right]$
where w is the window about the pixel, and $I_x$ and $I_y$ are the gradient images in the x and y direction, respectively. Calculate the gradient images using the fivepoint central difference operator. Set resulting values that are below a specified threshold value to zero (hint: calculating the mean instead of the sum in N allows for adjusting the size of the window without changing the threshold value). Apply an operation that suppresses (sets to 0) local (i.e., about a window) nonmaximum pixel values in the minor eigenvalue image. Vary these parameters such that around 600–650 features are detected in each image. For resulting nonzero pixel values, determine the subpixel feature coordinate using the Forstner corner point operator.
My implementation takes around 25 seconds to run. If yours is more than 250 seconds you may lose points.
%matplotlib inline
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import time
# open the input images
I1 = np.array(Image.open('price_center20.JPG'), dtype='float')/255.
I2 = np.array(Image.open('price_center21.JPG'), dtype='float')/255.
# Display the input images
plt.figure(figsize=(14,8))
plt.subplot(1,2,1)
plt.imshow(I1)
plt.subplot(1,2,2)
plt.imshow(I2)
plt.show()
from scipy import signal
def rgb2gray(rgb):
return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
def ImageGradient(I, w, t):
# inputs:
# I is the input image (may be mxn for Grayscale or mxnx3 for RGB)
# w is the size of the window used to compute the gradient matrix N
# t is the minor eigenvalue threshold
#
# outputs:
# N is the 2x2xmxn gradient matrix
# b in the 2x1xmxn vector used in the Forstner corner detector
# J0 is the mxn minor eigenvalue image of N before thresholding
# J1 is the mxn minor eigenvalue image of N after thresholding
m,n = I.shape[:2]
I = I*255
I = rgb2gray(I)
N = np.zeros((2,2,m,n))
b = np.zeros((2,1,m,n))
J0 = np.zeros((m,n))
J1 = np.zeros((m,n))
"""your code here"""
kx = np.reshape(1/12*np.array([-1, 8, 0, -8, 1]),(1,5))
ky = kx.transpose()
Ix = signal.convolve2d(I, kx, boundary='symm', mode='same')
Iy = signal.convolve2d(I, ky, boundary='symm', mode='same')
Ix2 = Ix*Ix
Iy2 = Iy*Iy
Ixy = Ix*Iy
X = np.array([[i for i in range(n)] for j in range(m)])
Y = np.array([[j for i in range(n)] for j in range(m)])
xIx2_ = X*Ix2 + Y*Ix*Iy
xIxIy_ = X*Ix*Iy + Y*Iy2
d = int((w-1)/2)
for y in range(d,m-d):
for x in range(d,n-d):
Ix2_win = Ix2[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
Iy2_win = Iy2[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
Ixy_win = Ixy[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
x2 = np.mean(Ix2_win)
y2 = np.mean(Iy2_win)
xy = np.mean(Ixy_win)
nxy = np.array([[x2, xy], [xy, y2]])
N[:,:,y,x] = nxy
xIx2_win = xIx2_[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
xIxIy_win = xIxIy_[int(y-d):int(y+d+1),int(x-d):int(x+d+1)]
b1 = np.mean(xIx2_win)
b2 = np.mean(xIxIy_win)
b[:,:,y,x] = [[b1],[b2]]
meig = (np.trace(nxy)- (np.trace(nxy)**2 - 4*np.linalg.det(nxy))**0.5)*0.5
J0[y,x] = meig
if meig > t:
J1[y,x] = meig
return N, b, J0, J1
def NMS(J, w_nms):
# Apply nonmaximum supression to J using window w
# For any window in J, the result should only contain 1 nonzero value
# In the case of multiple identical maxima in the same window,
# the tie may be broken arbitrarily
#
# inputs:
# J is the minor eigenvalue image input image after thresholding
# w_nms is the size of the local nonmaximum suppression window
#
# outputs:
# J2 is the mxn resulting image after applying nonmaximum suppression
#
J2 = J.copy()
"""your code here"""
m,n = J.shape[:2]
d = int((w_nms-1)/2)
for y in range(d,m-d):
for x in range(d,n-d):
Imax = np.max(J[int(y-d):int(y+d+1),int(x-d):int(x+d+1)])
if J2[y,x] < Imax:
J2[y,x] = 0
return J2
def ForstnerCornerDetector(J, N, b):
# Gather the coordinates of the nonzero pixels in J
# Then compute the sub pixel location of each point using the Forstner operator
#
# inputs:
# J is the NMS image
# N is the 2x2xmxn gradient matrix
# b is the 2x1xmxn vector computed in the image_gradient function
#
# outputs:
# C is the number of corners detected in each image
# pts is the 2xC list of coordinates of subpixel accurate corners
# found using the Forstner corner detector
"""your code here"""
coord = np.transpose(np.nonzero(J))
C = coord.shape[0]
pts = np.zeros((2,C))
for i in range(coord.shape[0]):
y = coord[i][0]
x = coord[i][1]
A = N[:,:,y,x]
b_= b[:,:,y,x]
corner = np.linalg.solve(A,b_)
corner = corner.reshape((2)).astype(int)
pts[:,i] = corner
return C, pts
# feature detection
def RunFeatureDetection(I, w, t, w_nms):
N, b, J0, J1 = ImageGradient(I, w, t)
J2 = NMS(J1, w_nms)
C, pts = ForstnerCornerDetector(J2, N, b)
return C, pts, J0, J1, J2
# input images
I1 = np.array(Image.open('price_center20.JPG'), dtype='float')/255.
I2 = np.array(Image.open('price_center21.JPG'), dtype='float')/255.
# parameters to tune
w = 9
t = 8
w_nms = 9
tic = time.time()
# run feature detection algorithm on input images
C1, pts1, J1_0, J1_1, J1_2 = RunFeatureDetection(I1, w, t, w_nms)
C2, pts2, J2_0, J2_1, J2_2 = RunFeatureDetection(I2, w, t, w_nms)
toc = time.time() - tic
print('took %f secs'%toc)
# display results
plt.figure(figsize=(14,24))
# show pre-thresholded corner heat map from gradient image function
plt.subplot(3,2,1)
plt.imshow(J1_0, cmap='gray')
plt.title('pre-thresholded gradient image')
plt.subplot(3,2,2)
plt.imshow(J2_0, cmap='gray')
plt.title('pre-thresholded gradient image')
# show thresholded corner heat map from gradient image function
plt.subplot(3,2,3)
plt.imshow(J1_1, cmap='gray')
plt.title('thresholded gradient image')
plt.subplot(3,2,4)
plt.imshow(J2_1, cmap='gray')
plt.title('thresholded gradient image')
# show corners on original images
ax = plt.subplot(3,2,5)
plt.imshow(I1)
for i in range(C1): # draw rectangles of size w around corners
x,y = pts1[:,i]
ax.add_patch(patches.Rectangle((x-w/2,y-w/2),w,w, fill=False))
# plt.plot(pts1[0,:], pts1[1,:], '.b') # display subpixel corners
plt.title('found %d corners'%C1)
ax = plt.subplot(3,2,6)
plt.imshow(I2)
for i in range(C2):
x,y = pts2[:,i]
ax.add_patch(patches.Rectangle((x-w/2,y-w/2),w,w, fill=False))
# plt.plot(pts2[0,:], pts2[1,:], '.b')
plt.title('found %d corners'%C2)
plt.show()
Determine the set of one-to-one putative feature correspondences by performing a brute-force search for the greatest correlation coefficient value (in the range [-1, 1]) between the detected features in image 1 and the detected features in image 2. Only allow matches that are above a specified correlation coefficient threshold value (note that calculating the correlation coefficient allows for adjusting the size of the matching window without changing the threshold value). Further, only allow matches that are above a specified distance ratio threshold value, where distance is measured to the next best match for a given feature. Vary these parameters such that around 200 putative feature correspondences are established. Optional: constrain the search to coordinates in image 2 that are within a proximity of the detected feature coordinates in image 1.
My implementation takes around 40 seconds to run. If yours is more than 400 seconds you may lose points.
def compute_ncc(img1, img2,c1,c2,R):
'''
helper: compute ncc for two windows
'''
matching_score = 0
num = 0
sum1 = 0
sum2 = 0
mean1 = np.sum(img1[int(c1[1]-R):int(c1[1]+R+1),int(c1[0]-R):int(c1[0]+R+1)])/(R*R)
mean2 = np.sum(img2[int(c2[1]-R):int(c2[1]+R+1),int(c2[0]-R):int(c2[0]+R+1)])/(R*R)
for y in range((-R),(R+1)):
for x in range((-R),(R+1)):
num += (img1[c1[1]+y,c1[0]+x]-mean1)*(img2[c2[1]+y,c2[0]+x]-mean2)
sum1 += (img1[c1[1]+y,c1[0]+x]-mean1)**2
sum2 += (img2[c2[1]+y,c2[0]+x]-mean2)**2
matching_score = num/np.sqrt(sum1*sum2)
return matching_score
def NCC(I1, I2, pts1, pts2, w, p):
# compute the normalized cross correlation between image patches I1, I2
# result should be in the range [-1,1]
#
# inputs:
# I1, I2 are the input images
# pts1, pts2 are the point to be matched
# w is the size of the matching window to compute correlation coefficients
#
# output:
# normalized cross correlation matrix of scores between all windows in
# image 1 and all windows in image 2
"""your code here"""
R = int((w-1)/2)
scores = np.zeros((pts1.shape[1],pts2.shape[1]))
I1_gray = rgb2gray(I1)
I2_gray = rgb2gray(I2)
for i in range(pts1.shape[1]):
for j in range(pts2.shape[1]):
c1 = pts1[:,i].astype(int)
c2 = pts2[:,j].astype(int)
if np.sqrt((c1[0]-c2[0])**2 + (c1[1]-c2[1])**2) < p:
if c1[0] > R and c1[0]+R+1 < I1.shape[1] and c1[1] > R and c1[1]+R+1 < I1.shape[0]:
if c2[0] > R and c2[0]+R+1 < I2.shape[1] and c2[1] > R and c2[1]+R+1 < I2.shape[0]:
scor = compute_ncc(I1_gray,I2_gray,c1,c2,R)
scores[i,j] = scor
return scores
def Match(scores, t, d, p):
# perform the one-to-one correspondence matching on the correlation coefficient matrix
#
# inputs:
# scores is the NCC matrix
# t is the correlation coefficient threshold
# d distance ration threshold
# p is the size of the proximity window
#
# output:
# list of the feature coordinates in image 1 and image 2
"""your code here"""
#inds = np.vstack((np.random.choice(pts1.shape[1],200,replace=False),
# np.random.choice(pts1.shape[1],200,replace=False)))
inds = np.zeros((2,1))
mask = np.ones(scores.shape)
masked_scores = mask * scores
max_cc = np.max(masked_scores)
next_max = 0
while t < max_cc :
indices = np.where(masked_scores == max_cc)
scores[int(indices[0]),int(indices[1])] = -1
next_max = np.max(scores[int(indices[0]),:])
if np.max(scores[:,int(indices[1])]) > next_max:
next_max = np.max(scores[:,int(indices[1])])
scores[int(indices[0]),int(indices[1])] = max_cc
if (1-max_cc) < (1-next_max)*d:
inds = np.hstack((inds,[[int(indices[0])],[int(indices[1])]]))
mask[int(indices[0]),:] = 0
mask[:,int(indices[1])] = 0
masked_scores = mask * scores
max_cc = np.max(masked_scores)
inds = inds[:,1:].astype(int)
return inds
def RunFeatureMatching(I1, I2, pts1, pts2, w, t, d, p):
# inputs:
# I1, I2 are the input images
# pts1, pts2 are the point to be matched
# w is the size of the matching window to compute correlation coefficients
# t is the correlation coefficient threshold
# d distance ration threshold
# p is the size of the proximity window
#
# outputs:
# inds is a 2xk matrix of matches where inds[0,i] indexs a point pts1
# and inds[1,i] indexs a point in pts2, where k is the number of matches
scores = NCC(I1, I2, pts1, pts2, w,p)
inds = Match(scores, t, d, p)
return inds
# parameters to tune
w = 11
t = 0.75
d = 0.8
p = 90
w1 = 11
tic = time.time()
# run the feature matching algorithm on the input images and detected features
inds = RunFeatureMatching(I1, I2, pts1, pts2, w, t, d, p)
toc = time.time() - tic
print('took %f secs'%toc)
# create new matrices of points which contain only the matched features
match1 = pts1[:,inds[0,:]]
match2 = pts2[:,inds[1,:]]
# display the results
plt.figure(figsize=(14,24))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
ax1.imshow(I1)
ax2.imshow(I2)
plt.title('found %d putative matches'%match1.shape[1])
for i in range(match1.shape[1]):
x1,y1 = match1[:,i]
x2,y2 = match2[:,i]
ax1.plot([x1, x2],[y1, y2],'-r')
ax1.add_patch(patches.Rectangle((x1-w1/2,y1-w1/2),w1,w1, fill=False))
ax2.plot([x2, x1],[y2, y1],'-r')
ax2.add_patch(patches.Rectangle((x2-w1/2,y2-w1/2),w1,w1, fill=False))
plt.show()
# test 1-1
print('unique points in image 1: %d'%np.unique(inds[0,:]).shape[0])
print('unique points in image 2: %d'%np.unique(inds[1,:]).shape[0])