-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbookFindingTest.cpp
More file actions
424 lines (401 loc) · 16.6 KB
/
bookFindingTest.cpp
File metadata and controls
424 lines (401 loc) · 16.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
#include <opencv2/core.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace cv;
using namespace std;
const int BIN = 4;
const double PERCENT_SOLID_COLOR = .85;
const int bucketSize = 256 / BIN;
const double PERCENT_OF_COLOR = .40;
const int MIN_RECT_WIDTH = 8;
const int MIN_RECT_HEIGHT = 8;
const int AREA_AROUND_RECT = 10;
const int MIN_AREA_RECT = 1000;
const int NUM_SIDE_RECT = 4;
const int COLOR_CHANNEL = 3;
const double PERCENT_DISTANCE_CONTOUR = .02;
// Due to Open CV using BGR values - these are the array index equivalents
const int BLUE_POSITION = 0;
const int GREEN_POSITION = 1;
const int RED_POSITION = 2;
const Scalar RED = Scalar(0, 0, 255);
const Scalar GREEN = Scalar(0, 255, 0);
const Scalar BLACK = Scalar(255, 255, 255);
/*
* Create Histogram function:
* This function creates a color histogram using 3D matrix and looping through an image assigning each of
* the BGR pixels to each of the histogram buckets.
* Preconditions: a valid image must be passed in for parsing the pixels for the histogram
* Postconditions: the histogram is returned after the image has been processed
*/
Mat createHistogram(const Mat& image) {
// size is a constant - the # of buckets in each dimension
int dims[COLOR_CHANNEL] = { BIN, BIN, BIN };
// create 3D histogram of integers initialized to zero
Mat hist(COLOR_CHANNEL, dims, CV_32S, Scalar::all(0));
// traverse the image and create a histogram of the various colors
for (int row = 0; row < image.rows - 1; row++) {
for (int col = 0; col < image.cols - 1; col++) {
// add the blue pixels to the corresponding histogram bin
int b = static_cast<int>(image.at<Vec3b>(row, col)[BLUE_POSITION] / bucketSize);
// add the green pixels to the corresponding histogram bin
int g = static_cast<int>(image.at<Vec3b>(row, col)[GREEN_POSITION] / bucketSize);
// add the red pixels to the corresponding histogram bin
int r = static_cast<int>(image.at<Vec3b>(row, col)[RED_POSITION] / bucketSize);
// increment the bin by 1
hist.at<int>(b, g, r)++;
}
}
return hist;
}
/*
* Find Most Common Color Function:
* This function determines the most common bgr pixel values from the histogram by setting a mostVotes variable
* and looping through the histogram to find the bin with the most votes and replacing the variable with that
* value and then determines the most common color by the equation (pixel color) * bucketSize + bucketSize / 2
* Preconditions: this requires initial BGR values and the histogram to determine the mostVotes in the histogram
* Postconditions: This function will derive the most common bgr values from the histogram
*/
void findMostCommonColor(int& cBlue, int& cGreen, int& cRed, const Mat& hist) {
// sets most votes to 0
int mostVotes = hist.at<int>(0, 0, 0);
// Loops through each of the histogram bins to determine if that bin has more votes that mostVotes
for (int i = 0; i < BIN; i++) {
for (int j = 0; j < BIN; j++) {
for (int k = 0; k < BIN; k++) {
// if the bin has the most votes...
if (hist.at<int>(i, j, k) > mostVotes) {
// update the most common blue to the value in the first histogram bin
cBlue = i;
// update the most common green to the value in the first histogram bin
cGreen = j;
// update the most common red to the value in the first histogram bin
cRed = k;
// update the mostVotes value and continue looking for the most votes bin
mostVotes = hist.at<int>(i, j, k);
}
}
}
}
int numBuckets = 2;
// once the looping has completed set the most common red value to r * bucketSize + bucketSize/2;
cRed = static_cast<int>(cRed * bucketSize + bucketSize / numBuckets);
// once the looping has completed set the most common green value to g * bucketSize + bucketSize/2;
cGreen = static_cast<int>(cGreen * bucketSize + bucketSize / numBuckets);
// once the looping has completed set the most common blue value to b * bucketSize + bucketSize/2;
cBlue = static_cast<int>(cBlue * bucketSize + bucketSize / numBuckets);
}
/*
* isSolidColor:
* This function determines if the object is a solid color (rectangle not book) by summing up the total pixels that
* match the most common color within up to 85% of the pixels and returns true if it's mostly that color.
* Preconditions: this requires the image for comparison
* Postconditions: this function produces a boolean for whether or not the object is mostly a solid color.
*/
bool isSolidColor(const Mat& input) {
// create color histogram
Mat hist = createHistogram(input);
// initialize values for most common b,g,r values
int b = 0;
int g = 0;
int r = 0;
// create an accumulator for the total pixels that match the most common color within a bucketSize
double total = 0;
// find the most common color in the histogram of the image
findMostCommonColor(b, g, r, hist);
for (int row = 0; row < input.rows; row++) {
for (int col = 0; col < input.cols; col++) {
// store the image color values for comparison and debugging
int bPrime = static_cast<int>(input.at<Vec3b>(row, col)[BLUE_POSITION]);
int gPrime = static_cast<int>(input.at<Vec3b>(row, col)[GREEN_POSITION]);
int rPrime = static_cast<int>(input.at<Vec3b>(row, col)[RED_POSITION]);
// compare the pixels in the image to the most common color +/- a bucketSize
if (input.at<Vec3b>(row, col)[BLUE_POSITION] - bucketSize <= b && input.at<Vec3b>(row, col)[BLUE_POSITION] + bucketSize >= b &&
input.at<Vec3b>(row, col)[GREEN_POSITION] - bucketSize <= g && input.at<Vec3b>(row, col)[GREEN_POSITION] + bucketSize >= g &&
input.at<Vec3b>(row, col)[RED_POSITION] - bucketSize <= r && input.at<Vec3b>(row, col)[RED_POSITION] + bucketSize >= r)
{
// if so increase the total
total++;
}
}
}
// divide the sum by the total number of pixels
total = total / ((double)input.rows * (double)input.cols);
// if the total pixels that match the most common color exceed 85%
if (total > PERCENT_SOLID_COLOR)
// the object is most likely a solid color (considering lighting, texture, shade, etc.)
return true;
// otherwise return false
return false;
}
/*
* findBooks:
* Function takes images and tries to identify as many possible books as
* it can.
* Preconditions: image is a valid image.
* Postconditions: vector containing all the possible books found.
*/
vector<Rect> findBooks(Mat& image, vector<vector<Point>>& bookContours)
{
// create a vector of the bound objects detected as books
vector<Rect> books;
Mat grayImage;
// convert the image to grayscale
cvtColor(image, grayImage, COLOR_BGR2GRAY);
// apply the gaussian blur
double sigmaX = 2.5;
double sigmaY = 2.5;
GaussianBlur(grayImage, grayImage, Size(3, 3), 2.5, 2.5);
// retrieve the mean color/intensity from the image
Scalar meanColor = mean(grayImage);
// cout << " " << meanColor[0] << endl;
// loop to set the threshold of an image and detect edges
for (int i = 55; i < 255; i += 75)
{
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
// set the lower threshold
int lowerThreshold = i;
// set the upper threshold (keep the value within the bounds)
int upperThreshold = (int)min(lowerThreshold * 3, 255);
// apply canny filter to the image
Canny(grayImage, grayImage, lowerThreshold, upperThreshold);
// apply a gaussian blur to the image
double sigmaX = 1.5;
double sigmaY = 1.5;
int border = 2;
GaussianBlur(grayImage, grayImage, Size(3, 3), sigmaX, sigmaY, border);
// create a morphology rectangle to be used in dilation of the image
Mat element = getStructuringElement(MORPH_RECT, Size(4.5, 4.5), Point(1, 1));
// apply dilation to bolden the edges found
dilate(grayImage, grayImage, element);
// find the countours of the image for object and shape recognition
findContours(grayImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
// create a vector of points for the contours
vector<Point> points;
// search through the countours found and stored in the hierarchy array
for (int i = 0; i >= 0; i = hierarchy[i][0])
{
// calculate the area of the given contour
double area = contourArea(contours[i]);
// set the epsilon (maximum distance to contour) to 2%
double epsilon = PERCENT_DISTANCE_CONTOUR * arcLength(contours[i], true);
// apply contour approximation
approxPolyDP(contours[i], points, epsilon, true);
// if it does not have 4 points
if (points.size() != NUM_SIDE_RECT)
{
// discard the object
continue;
}
// to reduce false positives from smaller segments of pixels, we set the min. contour area to 1000
if (area < MIN_AREA_RECT) {
continue;
}
// We then bind the object detected to a rectangle
Rect rectangle = boundingRect(contours[i]);
// create a display image of the object detected
Mat disp(image, rectangle);
// add the books to our vector of found books
books.push_back(rectangle);
// add the contours to the vector of book contours
bookContours.push_back(contours[i]);
}
}
// return the vector of books found
return books;
}
/*
* findMakers:
* Function takes images that have already been identified as possible books
* and looks for any makers on the image which could be possible titles or
* images seen on book covers
* Preconditions: image is a valid image.
* Postconditions: true or false of return depending on whether any makers were found.
*/
bool findMarkers(Mat image)
{
// create a grayscale image
Mat grayImage;
cvtColor(image, grayImage, COLOR_BGR2GRAY);
// create an elipse element
Mat element = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
// create morphology object to detect structures within the image
morphologyEx(grayImage, grayImage, MORPH_GRADIENT, element);
// apply a threshold to the colors/intensity of pixels within the image
double minThresholdValue = 0.0;
double maxThresholdValue = 255;
threshold(grayImage, grayImage, minThresholdValue, maxThresholdValue, THRESH_BINARY | THRESH_OTSU);
// create an image for detecting the objects that are rectangular within the image
Mat connected;
// create a morphology element to detect rectangles within the photo
element = getStructuringElement(MORPH_RECT, Size(9, 1));
// find the rectangular (closed) objects within the image
morphologyEx(grayImage, connected, MORPH_CLOSE, element);
// create a new matrix for the mask to
Mat mask = Mat::zeros(grayImage.size(), CV_8UC1);
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
// find the contours within the image
findContours(connected, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));
for (int i = 0; i >= 0; i = hierarchy[i][0])
{
//find all the rectangle -ish contours in the image
Rect rect = boundingRect(contours[i]);
// draw the contours for the image
drawContours(mask, contours, i, BLACK, FILLED);
// create a non-zero mask for detecting markers within the image
Mat nonZero(mask, rect);
// calculate the ratio of non-zero pixels within the entire image
double ratioOfNonZeroPixel = (double)countNonZero(nonZero) / (double(rect.width) * double(rect.height));
//try to make it so that it won't be counted as an title/word unless it's at a certain
//size and has a certain amount of non zero pixels (text)
if (ratioOfNonZeroPixel > PERCENT_OF_COLOR &&
(rect.height > MIN_RECT_HEIGHT&& rect.width > MIN_RECT_WIDTH))
{
// create a rectangle around the markers found within the image
int lineThickness = 2;
rectangle(image, rect, GREEN, lineThickness);
// return true for the marker being found
return true;
}
}
// otherwise the markers were not found - return false
return false;
}
/*
* bookAlreadyFound:
* Function takes a vector that has the location of books and check it with
* new points to see if we already got this book
* images seen on book covers
* Preconditions: booksFound contains points of books, x and y are valid points
* Postconditions: true or false of return depending on whether the book has already been found.
*/
bool bookAlreadyFound(vector<Point> booksFound, int x, int y)
{
// for each of the books found in the image
for (int i = 0; i < booksFound.size(); i++)
{
// check to see if the points have alread been found within the image
if ((booksFound[i].x - AREA_AROUND_RECT) < x && x < (booksFound[i].x + AREA_AROUND_RECT))
{
if ((booksFound[i].y - AREA_AROUND_RECT) < y && y < (booksFound[i].y + AREA_AROUND_RECT))
{
// return true if the book exists within our book vector
return true;
}
}
}
// otherwise it is a new book
return false;
}
/*
* Main:
* The primary function will search the current program directory to find all .jpg images and search for books in
* each image. We decided to add in the directory search functionality to reduce naming errors and demonstration
* purposes. This applies the algorithms mentioned above to produce output images of each of the test photos
* Preconditions: This can be run either directly or with use of the bash script
* Postconditions: Output images with books detected are produced with the images found in the directory
*/
int main(int argc, char* argv[])
{
// create a vector of directory images;
vector<String> directoryImages;
// set the directory location to the current folder, find photos and add to directoryImages
glob("./*.jpg", directoryImages, false);
// if directory is empty, notify user and close program
if (directoryImages.size() == 0) {
cout << "Error - no images were detected. Make sure the file extensions are '.jpg' format " <<
"and try again." << endl;
return 0;
}
// lambda to remove the output images so it doesn't infinitely create images
directoryImages.erase(
// remove if the directory name begins with .\output
remove_if(directoryImages.begin(), directoryImages.end(),
[](const std::string& s) {return s.find("output") != string::npos; }
),
directoryImages.end()
);
// vector to hold the images found in current directory
vector<Mat> images;
//number of jpg files in images folder
int count = directoryImages.size();
// for each image in the directory look for books
for (int i = 0; i < count; i++) {
// create a number for the book
int bookNumber = 1;
// read in the image from the directory
Mat image = imread(directoryImages[i]);
// create a contour vector for the points
vector<vector<Point>> contours;
// create the initial image for each file it scans (image1.jpg, image2.jpg ... etc.)
string initialImage = "image" + std::to_string(i + 1);
// create a window to display the image
namedWindow(initialImage, WINDOW_AUTOSIZE);
// display the image
imshow(initialImage, image);
// display the image until user closes the window
waitKey(0);
// find the books within the image
vector<Rect> books = findBooks(image, contours);
// create a vector for the coordinates where the books were found
vector<Point> foundBooks;
// for each of the objects identified as a book
for (int i = 0; i < books.size(); i++)
{
// create an image of the current book being examined
Mat disp(image, books[i]);
// check to see if it is a solid color or doesn't have markers
if (isSolidColor(disp) || !findMarkers(disp))
{
// if it lacks the descriptors, it is a false positive, continue
continue;
}
// otherwise check if we have already found the book
else
{
// if the object detected at that location has not already been defined as a found book
if (!bookAlreadyFound(foundBooks, books[i].x, books[i].y))
{
// draw the contour around the book
double contourLineThickness = 2;
drawContours(image, contours, i, GREEN, contourLineThickness);
// add the book to our collection of found books
foundBooks.push_back(Point(books[i].x, books[i].y));
// create a string of text to output the book number
string bookNum = "Book [" + std::to_string(bookNumber) + "]";
// output the book number above the
double lineThickness = .9;
double lineType = 2.9;
putText(image, bookNum, Point(books[i].x + 1, books[i].y + 20),
FONT_HERSHEY_COMPLEX_SMALL, lineThickness, RED, lineType);
// increment the bookNumber
bookNumber++;
}
}
}
// create the output image for each file it scans (output1.jpg, output2.jpg ... etc.)
string name = "output" + std::to_string(i + 1);
// create a window to display the image
namedWindow(name, WINDOW_AUTOSIZE);
// display the image
imshow(name, image);
// write the output image to the file directory
imwrite(name + ".jpg", image);
// display the image until user closes the window
waitKey(0);
}
// clear the directory of images to free the memory
directoryImages.clear();
// return successful
return 0;
}