tesseract  4.1.1
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 /*----------------------------------------------------------------------------
18  Include Files and Type Defines
19 ----------------------------------------------------------------------------*/
20 #include "picofeat.h"
21 
22 #include "classify.h"
23 #include "featdefs.h"
24 #include "fpoint.h"
25 #include "mfoutline.h"
26 #include "ocrfeatures.h"
27 #include "params.h"
28 #include "trainingsample.h"
29 
30 #include <cmath>
31 #include <cstdio>
32 
33 /*---------------------------------------------------------------------------
34  Variables
35 ----------------------------------------------------------------------------*/
36 
37 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38 
39 /*---------------------------------------------------------------------------
40  Private Function Prototypes
41 ----------------------------------------------------------------------------*/
43  FPOINT *End,
44  FEATURE_SET FeatureSet);
45 
46 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
47 
48 void NormalizePicoX(FEATURE_SET FeatureSet);
49 
50 /*----------------------------------------------------------------------------
51  Public Code
52 ----------------------------------------------------------------------------*/
53 /*---------------------------------------------------------------------------*/
54 namespace tesseract {
64  LIST Outlines;
65  LIST RemainingOutlines;
66  MFOUTLINE Outline;
67  FEATURE_SET FeatureSet;
68  float XScale, YScale;
69 
70  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
71  Outlines = ConvertBlob(Blob);
72  NormalizeOutlines(Outlines, &XScale, &YScale);
73  RemainingOutlines = Outlines;
74  iterate(RemainingOutlines) {
75  Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
76  ConvertToPicoFeatures2(Outline, FeatureSet);
77  }
79  NormalizePicoX(FeatureSet);
80  FreeOutlines(Outlines);
81  return (FeatureSet);
82 
83 } /* ExtractPicoFeatures */
84 } // namespace tesseract
85 
86 /*----------------------------------------------------------------------------
87  Private Code
88 ----------------------------------------------------------------------------*/
89 /*---------------------------------------------------------------------------*/
104  FPOINT *End,
105  FEATURE_SET FeatureSet) {
106  FEATURE Feature;
107  float Angle;
108  float Length;
109  int NumFeatures;
110  FPOINT Center;
111  FPOINT Delta;
112  int i;
113 
114  Angle = NormalizedAngleFrom (Start, End, 1.0);
115  Length = DistanceBetween (*Start, *End);
116  NumFeatures = static_cast<int>(floor (Length / classify_pico_feature_length + 0.5));
117  if (NumFeatures < 1)
118  NumFeatures = 1;
119 
120  /* compute vector for one pico feature */
121  Delta.x = XDelta (*Start, *End) / NumFeatures;
122  Delta.y = YDelta (*Start, *End) / NumFeatures;
123 
124  /* compute position of first pico feature */
125  Center.x = Start->x + Delta.x / 2.0;
126  Center.y = Start->y + Delta.y / 2.0;
127 
128  /* compute each pico feature in segment and add to feature set */
129  for (i = 0; i < NumFeatures; i++) {
130  Feature = NewFeature (&PicoFeatDesc);
131  Feature->Params[PicoFeatDir] = Angle;
132  Feature->Params[PicoFeatX] = Center.x;
133  Feature->Params[PicoFeatY] = Center.y;
134  AddFeature(FeatureSet, Feature);
135 
136  Center.x += Delta.x;
137  Center.y += Delta.y;
138  }
139 } /* ConvertSegmentToPicoFeat */
140 
141 
142 /*---------------------------------------------------------------------------*/
155 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
156  MFOUTLINE Next;
157  MFOUTLINE First;
158  MFOUTLINE Current;
159 
160  if (DegenerateOutline(Outline))
161  return;
162 
163  First = Outline;
164  Current = First;
165  Next = NextPointAfter(Current);
166  do {
167  /* note that an edge is hidden if the ending point of the edge is
168  marked as hidden. This situation happens because the order of
169  the outlines is reversed when they are converted from the old
170  format. In the old format, a hidden edge is marked by the
171  starting point for that edge. */
172  if (!(PointAt(Next)->Hidden))
173  ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
174  &(PointAt(Next)->Point), FeatureSet);
175 
176  Current = Next;
177  Next = NextPointAfter(Current);
178  }
179  while (Current != First);
180 
181 } /* ConvertToPicoFeatures2 */
182 
183 
184 /*---------------------------------------------------------------------------*/
193 void NormalizePicoX(FEATURE_SET FeatureSet) {
194  int i;
195  FEATURE Feature;
196  float Origin = 0.0;
197 
198  for (i = 0; i < FeatureSet->NumFeatures; i++) {
199  Feature = FeatureSet->Features[i];
200  Origin += Feature->Params[PicoFeatX];
201  }
202  Origin /= FeatureSet->NumFeatures;
203 
204  for (i = 0; i < FeatureSet->NumFeatures; i++) {
205  Feature = FeatureSet->Features[i];
206  Feature->Params[PicoFeatX] -= Origin;
207  }
208 } /* NormalizePicoX */
209 
210 namespace tesseract {
211 /*---------------------------------------------------------------------------*/
218  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
219  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
222  blob, false, &local_fx_info, &bl_features);
223  if (sample == nullptr) return nullptr;
224 
225  uint32_t num_features = sample->num_features();
226  const INT_FEATURE_STRUCT* features = sample->features();
227  FEATURE_SET feature_set = NewFeatureSet(num_features);
228  for (uint32_t f = 0; f < num_features; ++f) {
229  FEATURE feature = NewFeature(&IntFeatDesc);
230 
231  feature->Params[IntX] = features[f].X;
232  feature->Params[IntY] = features[f].Y;
233  feature->Params[IntDir] = features[f].Theta;
234  AddFeature(feature_set, feature);
235  }
236  delete sample;
237 
238  return feature_set;
239 } /* ExtractIntCNFeatures */
240 
241 /*---------------------------------------------------------------------------*/
248  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
249  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
252  blob, false, &local_fx_info, &bl_features);
253  if (sample == nullptr) return nullptr;
254 
255  FEATURE_SET feature_set = NewFeatureSet(1);
256  FEATURE feature = NewFeature(&IntFeatDesc);
257 
258  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
259  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
260  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
261  AddFeature(feature_set, feature);
262  delete sample;
263 
264  return feature_set;
265 } /* ExtractIntGeoFeatures */
266 
267 } // namespace tesseract.
#define iterate(l)
Definition: oldlist.h:101
FEATURE Features[1]
Definition: ocrfeatures.h:68
#define XDelta(A, B)
Definition: fpoint.h:38
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:29
Definition: blobs.h:284
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:78
Definition: picofeat.h:31
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:155
#define YDelta(A, B)
Definition: fpoint.h:39
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: ocrfeatures.cpp:94
const FEATURE_DESC_STRUCT IntFeatDesc
#define double_VAR(name, val, comment)
Definition: params.h:312
#define MAX_PICO_FEATURES
Definition: picofeat.h:46
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:40
float y
Definition: fpoint.h:30
Definition: picofeat.h:30
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
Definition: fpoint.h:29
double classify_pico_feature_length
Definition: picofeat.cpp:37
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:217
float Params[1]
Definition: ocrfeatures.h:61
float x
Definition: fpoint.h:30
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:44
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:63
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:77
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:193
uint16_t NumFeatures
Definition: ocrfeatures.h:66
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:103
#define first_node(l)
Definition: oldlist.h:92
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:167
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:37
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:276
Definition: cluster.h:32
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:247