tesseract  4.1.1
thresholder.cpp
Go to the documentation of this file.
1 // File: thresholder.cpp
3 // Description: Base API for thresholding images in tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "allheaders.h"
20 
21 #include "thresholder.h"
22 
23 #include <cstdint> // for uint32_t
24 #include <cstring>
25 
26 #include "otsuthr.h"
27 #include "tprintf.h" // for tprintf
28 
29 #if defined(USE_OPENCL)
30 #include "openclwrapper.h" // for OpenclDevice
31 #endif
32 
33 namespace tesseract {
34 
36  : pix_(nullptr),
37  image_width_(0), image_height_(0),
38  pix_channels_(0), pix_wpl_(0),
39  scale_(1), yres_(300), estimated_res_(300) {
40  SetRectangle(0, 0, 0, 0);
41 }
42 
44  Clear();
45 }
46 
47 // Destroy the Pix if there is one, freeing memory.
49  pixDestroy(&pix_);
50 }
51 
52 // Return true if no image has been set.
54  return pix_ == nullptr;
55 }
56 
57 // SetImage makes a copy of all the image data, so it may be deleted
58 // immediately after this call.
59 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
60 // Palette color images will not work properly and must be converted to
61 // 24 bit.
62 // Binary images of 1 bit per pixel may also be given but they must be
63 // byte packed with the MSB of the first byte being the first pixel, and a
64 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
65 void ImageThresholder::SetImage(const unsigned char* imagedata,
66  int width, int height,
67  int bytes_per_pixel, int bytes_per_line) {
68  int bpp = bytes_per_pixel * 8;
69  if (bpp == 0) bpp = 1;
70  Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
71  l_uint32* data = pixGetData(pix);
72  int wpl = pixGetWpl(pix);
73  switch (bpp) {
74  case 1:
75  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
76  for (int x = 0; x < width; ++x) {
77  if (imagedata[x / 8] & (0x80 >> (x % 8)))
78  CLEAR_DATA_BIT(data, x);
79  else
80  SET_DATA_BIT(data, x);
81  }
82  }
83  break;
84 
85  case 8:
86  // Greyscale just copies the bytes in the right order.
87  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
88  for (int x = 0; x < width; ++x)
89  SET_DATA_BYTE(data, x, imagedata[x]);
90  }
91  break;
92 
93  case 24:
94  // Put the colors in the correct places in the line buffer.
95  for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
96  for (int x = 0; x < width; ++x, ++data) {
97  SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
98  SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
99  SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
100  }
101  }
102  break;
103 
104  case 32:
105  // Maintain byte order consistency across different endianness.
106  for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
107  for (int x = 0; x < width; ++x) {
108  data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
109  (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
110  }
111  }
112  break;
113 
114  default:
115  tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
116  }
117  SetImage(pix);
118  pixDestroy(&pix);
119 }
120 
121 // Store the coordinates of the rectangle to process for later use.
122 // Doesn't actually do any thresholding.
123 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
124  rect_left_ = left;
125  rect_top_ = top;
126  rect_width_ = width;
127  rect_height_ = height;
128 }
129 
130 // Get enough parameters to be able to rebuild bounding boxes in the
131 // original image (not just within the rectangle).
132 // Left and top are enough with top-down coordinates, but
133 // the height of the rectangle and the image are needed for bottom-up.
134 void ImageThresholder::GetImageSizes(int* left, int* top,
135  int* width, int* height,
136  int* imagewidth, int* imageheight) {
137  *left = rect_left_;
138  *top = rect_top_;
139  *width = rect_width_;
140  *height = rect_height_;
141  *imagewidth = image_width_;
142  *imageheight = image_height_;
143 }
144 
145 // Pix vs raw, which to use? Pix is the preferred input for efficiency,
146 // since raw buffers are copied.
147 // SetImage for Pix clones its input, so the source pix may be pixDestroyed
148 // immediately after, but may not go away until after the Thresholder has
149 // finished with it.
150 void ImageThresholder::SetImage(const Pix* pix) {
151  if (pix_ != nullptr)
152  pixDestroy(&pix_);
153  Pix* src = const_cast<Pix*>(pix);
154  int depth;
155  pixGetDimensions(src, &image_width_, &image_height_, &depth);
156  // Convert the image as necessary so it is one of binary, plain RGB, or
157  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
158  // not just a clone of the input.
159  if (pixGetColormap(src)) {
160  Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
161  depth = pixGetDepth(tmp);
162  if (depth > 1 && depth < 8) {
163  pix_ = pixConvertTo8(tmp, false);
164  pixDestroy(&tmp);
165  } else {
166  pix_ = tmp;
167  }
168  } else if (depth > 1 && depth < 8) {
169  pix_ = pixConvertTo8(src, false);
170  } else {
171  pix_ = pixCopy(nullptr, src);
172  }
173  depth = pixGetDepth(pix_);
174  pix_channels_ = depth / 8;
175  pix_wpl_ = pixGetWpl(pix_);
176  scale_ = 1;
177  estimated_res_ = yres_ = pixGetYRes(pix_);
178  Init();
179 }
180 
181 // Threshold the source image as efficiently as possible to the output Pix.
182 // Creates a Pix and sets pix to point to the resulting pointer.
183 // Caller must use pixDestroy to free the created Pix.
185 bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
186  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
187  tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
188  return false;
189  }
190  if (pix_channels_ == 0) {
191  // We have a binary image, but it still has to be copied, as this API
192  // allows the caller to modify the output.
193  Pix* original = GetPixRect();
194  *pix = pixCopy(nullptr, original);
195  pixDestroy(&original);
196  } else {
198  }
199  return true;
200 }
201 
202 // Gets a pix that contains an 8 bit threshold value at each pixel. The
203 // returned pix may be an integer reduction of the binary image such that
204 // the scale factor may be inferred from the ratio of the sizes, even down
205 // to the extreme of a 1x1 pixel thresholds image.
206 // Ideally the 8 bit threshold should be the exact threshold used to generate
207 // the binary image in ThresholdToPix, but this is not a hard constraint.
208 // Returns nullptr if the input is binary. PixDestroy after use.
210  if (IsBinary()) return nullptr;
211  Pix* pix_grey = GetPixRectGrey();
212  int width = pixGetWidth(pix_grey);
213  int height = pixGetHeight(pix_grey);
214  int* thresholds;
215  int* hi_values;
216  OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
217  pixDestroy(&pix_grey);
218  Pix* pix_thresholds = pixCreate(width, height, 8);
219  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
220  pixSetAllArbitrary(pix_thresholds, threshold);
221  delete [] thresholds;
222  delete [] hi_values;
223  return pix_thresholds;
224 }
225 
226 // Common initialization shared between SetImage methods.
229 }
230 
231 // Get a clone/copy of the source image rectangle.
232 // The returned Pix must be pixDestroyed.
233 // This function will be used in the future by the page layout analysis, and
234 // the layout analysis that uses it will only be available with Leptonica,
235 // so there is no raw equivalent.
237  if (IsFullImage()) {
238  // Just clone the whole thing.
239  return pixClone(pix_);
240  } else {
241  // Crop to the given rectangle.
242  Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
243  Pix* cropped = pixClipRectangle(pix_, box, nullptr);
244  boxDestroy(&box);
245  return cropped;
246  }
247 }
248 
249 // Get a clone/copy of the source image rectangle, reduced to greyscale,
250 // and at the same resolution as the output binary.
251 // The returned Pix must be pixDestroyed.
252 // Provided to the classifier to extract features from the greyscale image.
254  Pix* pix = GetPixRect(); // May have to be reduced to grey.
255  int depth = pixGetDepth(pix);
256  if (depth != 8) {
257  Pix* result = depth < 8 ? pixConvertTo8(pix, false)
258  : pixConvertRGBToLuminance(pix);
259  pixDestroy(&pix);
260  return result;
261  }
262  return pix;
263 }
264 
265 // Otsu thresholds the rectangle, taking the rectangle from *this.
267  Pix** out_pix) const {
268  int* thresholds;
269  int* hi_values;
270 
271  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
272  rect_height_, &thresholds, &hi_values);
273  // only use opencl if compiled w/ OpenCL and selected device is opencl
274 #ifdef USE_OPENCL
275  OpenclDevice od;
276  if (num_channels == 4 &&
277  od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
278  od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
279  pixGetWpl(src_pix) * 4, thresholds, hi_values,
280  out_pix /*pix_OCL*/, rect_height_, rect_width_,
282  } else {
283 #endif
284  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
285 #ifdef USE_OPENCL
286  }
287 #endif
288  delete [] thresholds;
289  delete [] hi_values;
290 }
291 
295 // arrays and also the bytes per pixel in src_pix.
297  int num_channels,
298  const int* thresholds,
299  const int* hi_values,
300  Pix** pix) const {
301  *pix = pixCreate(rect_width_, rect_height_, 1);
302  uint32_t* pixdata = pixGetData(*pix);
303  int wpl = pixGetWpl(*pix);
304  int src_wpl = pixGetWpl(src_pix);
305  uint32_t* srcdata = pixGetData(src_pix);
306  pixSetXRes(*pix, pixGetXRes(src_pix));
307  pixSetYRes(*pix, pixGetYRes(src_pix));
308  for (int y = 0; y < rect_height_; ++y) {
309  const uint32_t* linedata = srcdata + (y + rect_top_) * src_wpl;
310  uint32_t* pixline = pixdata + y * wpl;
311  for (int x = 0; x < rect_width_; ++x) {
312  bool white_result = true;
313  for (int ch = 0; ch < num_channels; ++ch) {
314  int pixel =
315  GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
316  if (hi_values[ch] >= 0 &&
317  (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
318  white_result = false;
319  break;
320  }
321  }
322  if (white_result)
323  CLEAR_DATA_BIT(pixline, x);
324  else
325  SET_DATA_BIT(pixline, x);
326  }
327  }
328 }
329 
330 } // namespace tesseract.
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int pix_channels_
Number of 8-bit channels in pix_.
Definition: thresholder.h:175
int yres_
y pixels/inch in source image.
Definition: thresholder.h:179
int pix_wpl_
Words per line of pix_.
Definition: thresholder.h:176
void ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds, const int *hi_values, Pix **pix) const
int image_height_
Height of source pix_.
Definition: thresholder.h:174
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:74
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
Definition: otsuthr.cpp:40
void SetRectangle(int left, int top, int width, int height)
virtual void Init()
Common initialization shared between SetImage methods.
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:48
virtual Pix * GetPixRectGrey()
int estimated_res_
Resolution estimate from text size.
Definition: thresholder.h:180
int scale_
Scale factor from original image.
Definition: thresholder.h:178
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65
virtual Pix * GetPixRectThresholds()
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
int image_width_
Width of source pix_.
Definition: thresholder.h:173
bool IsFullImage() const
Return true if we are processing the full image.
Definition: thresholder.h:152