Initial commit

andrewdcampbell · Jul 11, 2017 · 5a6060b · 5a6060b
commit 5a6060b
Show file tree

Hide file tree

Showing 25 changed files with 600 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc 
+.DS_Store
diff --git a/README.md b/README.md
@@ -0,0 +1,29 @@
+# Document Scanner
+
+### An interactive document scanner built in Python using OpenCV
+
+The scanner takes a poorly scanned image, finds the corners of the document, applies the perspective transformation to get a top-down view of the document, sharpens the image, and applies an adaptive color threshold to clean up the image.
+
+On my test dataset of 280 images, the program correctly detected the corners of the document 92.8% of the time.
+
+This project makes use of the transform and imutils modules from pyimagesearch (which can be accessed [here](http://www.pyimagesearch.com/2014/09/01/build-kick-ass-mobile-document-scanner-just-5-minutes/)). The UI code for the interactive mode is adapted from `poly_editor.py` from [here](https://matplotlib.org/examples/event_handling/poly_editor.html).
+
+* You can manually click and drag the corners of the document to be perspective transformed:
+![Example of interactive GUI](https://github.com/andrewdcampbell/doc_scanner/blob/master/ui.gif)
+
+* The scanner can also process an entire directory of images automatically and save the output in an output directory:
+![Image Directory of images to be processed](https://github.com/andrewdcampbell/doc_scanner/blob/master/before_after.gif)
+
+
+### Usage
+```
+python scan.py (--images <IMG_DIR> | --image <IMG_PATH>) [-i]
+```
+* For example, to scan a single image with interactive mode:
+```
+python scan.py --image images/page.jpg -i
+```
+* To scan all images in a directory automatically:
+```
+python scan.py --images images
+```
diff --git a/before_after.gif b/before_after.gif
diff --git a/output/cell_pic.jpg b/output/cell_pic.jpg
diff --git a/output/chart.JPG b/output/chart.JPG
diff --git a/output/desk.JPG b/output/desk.JPG
diff --git a/output/dollar_bill.JPG b/output/dollar_bill.JPG
diff --git a/output/math_cheat_sheet.JPG b/output/math_cheat_sheet.JPG
diff --git a/output/notepad.JPG b/output/notepad.JPG
diff --git a/output/receipt.jpg b/output/receipt.jpg
diff --git a/output/tax.jpeg b/output/tax.jpeg
diff --git a/polygon_interacter.py b/polygon_interacter.py
@@ -0,0 +1,107 @@
+import numpy as np
+from matplotlib.lines import Line2D
+from matplotlib.artist import Artist
+from matplotlib.mlab import dist_point_to_segment
+
+
+class PolygonInteractor(object):
+    """
+    An polygon editor
+    """
+
+    showverts = True
+    epsilon = 5  # max pixel distance to count as a vertex hit
+
+    def __init__(self, ax, poly):
+        if poly.figure is None:
+            raise RuntimeError('You must first add the polygon to a figure or canvas before defining the interactor')
+        self.ax = ax
+        canvas = poly.figure.canvas
+        self.poly = poly
+
+        x, y = zip(*self.poly.xy)
+        self.line = Line2D(x, y, marker='o', markerfacecolor='r', animated=True)
+        self.ax.add_line(self.line)
+
+        cid = self.poly.add_callback(self.poly_changed)
+        self._ind = None  # the active vert
+
+        canvas.mpl_connect('draw_event', self.draw_callback)
+        canvas.mpl_connect('button_press_event', self.button_press_callback)
+        canvas.mpl_connect('button_release_event', self.button_release_callback)
+        canvas.mpl_connect('motion_notify_event', self.motion_notify_callback)
+        self.canvas = canvas
+
+    def get_poly_points(self):
+        return np.asarray(self.poly.xy)
+
+    def draw_callback(self, event):
+        self.background = self.canvas.copy_from_bbox(self.ax.bbox)
+        self.ax.draw_artist(self.poly)
+        self.ax.draw_artist(self.line)
+        self.canvas.blit(self.ax.bbox)
+
+    def poly_changed(self, poly):
+        'this method is called whenever the polygon object is called'
+        # only copy the artist props to the line (except visibility)
+        vis = self.line.get_visible()
+        Artist.update_from(self.line, poly)
+        self.line.set_visible(vis)  # don't use the poly visibility state
+
+    def get_ind_under_point(self, event):
+        'get the index of the vertex under point if within epsilon tolerance'
+
+        # display coords
+        xy = np.asarray(self.poly.xy)
+        xyt = self.poly.get_transform().transform(xy)
+        xt, yt = xyt[:, 0], xyt[:, 1]
+        d = np.sqrt((xt - event.x)**2 + (yt - event.y)**2)
+        indseq = np.nonzero(np.equal(d, np.amin(d)))[0]
+        ind = indseq[0]
+
+        if d[ind] >= self.epsilon:
+            ind = None
+
+        return ind
+
+    def button_press_callback(self, event):
+        'whenever a mouse button is pressed'
+        if not self.showverts:
+            return
+        if event.inaxes is None:
+            return
+        if event.button != 1:
+            return
+        self._ind = self.get_ind_under_point(event)
+
+    def button_release_callback(self, event):
+        'whenever a mouse button is released'
+        if not self.showverts:
+            return
+        if event.button != 1:
+            return
+        self._ind = None
+
+    def motion_notify_callback(self, event):
+        'on mouse movement'
+        if not self.showverts:
+            return
+        if self._ind is None:
+            return
+        if event.inaxes is None:
+            return
+        if event.button != 1:
+            return
+        x, y = event.xdata, event.ydata
+
+        self.poly.xy[self._ind] = x, y
+        if self._ind == 0:
+            self.poly.xy[-1] = x, y
+        elif self._ind == len(self.poly.xy) - 1:
+            self.poly.xy[0] = x, y
+        self.line.set_data(zip(*self.poly.xy))
+
+        self.canvas.restore_region(self.background)
+        self.ax.draw_artist(self.poly)
+        self.ax.draw_artist(self.line)
+        self.canvas.blit(self.ax.bbox)
diff --git a/pyimagesearch/__init__.py b/pyimagesearch/__init__.py
diff --git a/pyimagesearch/imutils.py b/pyimagesearch/imutils.py
@@ -0,0 +1,58 @@
+# Import the necessary packages
+import numpy as np
+import cv2
+
+def translate(image, x, y):
+	# Define the translation matrix and perform the translation
+	M = np.float32([[1, 0, x], [0, 1, y]])
+	shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
+
+	# Return the translated image
+	return shifted
+
+def rotate(image, angle, center = None, scale = 1.0):
+	# Grab the dimensions of the image
+	(h, w) = image.shape[:2]
+
+	# If the center is None, initialize it as the center of
+	# the image
+	if center is None:
+		center = (w / 2, h / 2)
+
+	# Perform the rotation
+	M = cv2.getRotationMatrix2D(center, angle, scale)
+	rotated = cv2.warpAffine(image, M, (w, h))
+
+	# Return the rotated image
+	return rotated
+
+def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
+	# initialize the dimensions of the image to be resized and
+	# grab the image size
+	dim = None
+	(h, w) = image.shape[:2]
+
+	# if both the width and height are None, then return the
+	# original image
+	if width is None and height is None:
+		return image
+
+	# check to see if the width is None
+	if width is None:
+		# calculate the ratio of the height and construct the
+		# dimensions
+		r = height / float(h)
+		dim = (int(w * r), height)
+
+	# otherwise, the height is None
+	else:
+		# calculate the ratio of the width and construct the
+		# dimensions
+		r = width / float(w)
+		dim = (width, int(h * r))
+
+	# resize the image
+	resized = cv2.resize(image, dim, interpolation = inter)
+
+	# return the resized image
+	return resized
diff --git a/pyimagesearch/transform.py b/pyimagesearch/transform.py
@@ -0,0 +1,69 @@
+# import the necessary packages
+from scipy.spatial import distance as dist
+import numpy as np
+import cv2
+
+def order_points(pts):
+    # sort the points based on their x-coordinates
+    xSorted = pts[np.argsort(pts[:, 0]), :]
+
+    # grab the left-most and right-most points from the sorted
+    # x-roodinate points
+    leftMost = xSorted[:2, :]
+    rightMost = xSorted[2:, :]
+
+    # now, sort the left-most coordinates according to their
+    # y-coordinates so we can grab the top-left and bottom-left
+    # points, respectively
+    leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
+    (tl, bl) = leftMost
+
+    # now that we have the top-left coordinate, use it as an
+    # anchor to calculate the Euclidean distance between the
+    # top-left and right-most points; by the Pythagorean
+    # theorem, the point with the largest distance will be
+    # our bottom-right point
+    D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
+    (br, tr) = rightMost[np.argsort(D)[::-1], :]
+
+    # return the coordinates in top-left, top-right,
+    # bottom-right, and bottom-left order
+    return np.array([tl, tr, br, bl], dtype = "float32")
+
+def four_point_transform(image, pts):
+    # obtain a consistent order of the points and unpack them
+    # individually
+    rect = order_points(pts)
+    (tl, tr, br, bl) = rect
+
+    # compute the width of the new image, which will be the
+    # maximum distance between bottom-right and bottom-left
+    # x-coordiates or the top-right and top-left x-coordinates
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+
+    # compute the height of the new image, which will be the
+    # maximum distance between the top-right and bottom-right
+    # y-coordinates or the top-left and bottom-left y-coordinates
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+
+    # now that we have the dimensions of the new image, construct
+    # the set of destination points to obtain a "birds eye view",
+    # (i.e. top-down view) of the image, again specifying points
+    # in the top-left, top-right, bottom-right, and bottom-left
+    # order
+    dst = np.array([
+        [0, 0],
+        [maxWidth - 1, 0],
+        [maxWidth - 1, maxHeight - 1],
+        [0, maxHeight - 1]], dtype = "float32")
+
+    # compute the perspective transform matrix and then apply it
+    M = cv2.getPerspectiveTransform(rect, dst)
+    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+
+    # return the warped image
+    return warped
diff --git a/sample_images/cell_pic.jpg b/sample_images/cell_pic.jpg
diff --git a/sample_images/chart.JPG b/sample_images/chart.JPG
diff --git a/sample_images/desk.JPG b/sample_images/desk.JPG
diff --git a/sample_images/dollar_bill.JPG b/sample_images/dollar_bill.JPG
diff --git a/sample_images/math_cheat_sheet.JPG b/sample_images/math_cheat_sheet.JPG
diff --git a/sample_images/notepad.JPG b/sample_images/notepad.JPG
diff --git a/sample_images/receipt.jpg b/sample_images/receipt.jpg
diff --git a/sample_images/tax.jpeg b/sample_images/tax.jpeg