00001 #ifndef __objectInfoClass_H
00002 #define __objectInfoClass_H
00003
00004
00005 #ifndef _JSCRIPT_H_
00006 #include <JScript.h>
00007 #endif
00008
00009 #ifndef __getLines_H
00010 #include <getLines.h>
00011 #endif
00012
00013 #ifndef __polyClass_H
00014 #include <polyClass.h>
00015 #endif
00016
00017
00022 enum objectType {nullObject = 1, taggedForDelete, textLineCount, script, caereWord, pageType, textLine,
00023 scansoft_flowed_text,scansoft_table,scansoft_graphics};
00024
00025
00036 struct objectInfo
00037 {
00038 objectType type;
00039 char *qualifier;
00040 int int_qualifier;
00041 float upper_ext;
00042 float lower_ext;
00043 struct pointLocation location[2];
00044
00045
00046 unsigned int occuranceCount;
00047 };
00052 class objectInfoClass {
00053
00054 public:
00071 objectInfoClass::objectInfoClass(std::string filename,
00072 JScript *scriptInfo,int line_count_limit, char *output_directory,
00073 int pixelDataFlag,int scansoftFlag,int dctFlag);
00074
00085 objectInfoClass::objectInfoClass(std::string filename,
00086 JScript *scriptInfo,int scansoftFlag, int dctFlag);
00087
00091 objectInfoClass::~objectInfoClass();
00092
00093
00094
00095 int getObjectCount() const {return( _objectCount);}
00096
00097
00098
00099
00100
00101 objectInfo *objectStructure() const{return( imageObjects);}
00102
00103
00104
00105
00106 void objectInfoClass::printObjectInfo(char *filename);
00107
00108
00109
00110
00111
00112 void objectInfoClass::printTextLineInfo(FILE *fp);
00113
00117 unsigned long getImageHeight() const {return( _height);}
00118
00122 unsigned long getImageWidth() const {return( _width);}
00123
00127 unsigned long getOriginalImageHeight() const {return( _originalHeight);}
00128
00132 unsigned long getOriginalImageWidth() const {return( _originalWidth);}
00133
00137 double getOriginalSkew() const {return( _originalImageSkew);}
00138
00142 unsigned char *getPixelData() const {return( _pixelData);}
00143
00147 double getSlopeVarSqr() const {return (_lineSlopeVarianceSqr);}
00148
00152 int getLineCount() const {return (_lineCount);}
00153
00157 char *getScript() {return (&_script[0]);}
00158
00164 char *getBestFitScript() {return (&_bestFitScript[0]);}
00165
00169 int getlineDirection() {return(_lineDirection);}
00170
00174 int getscriptIdOrientation() {return(_scriptIdOrientation);}
00175
00179 void objectInfoClass::writeG4Tiff(char *fname);
00180
00186 void objectInfoClass::writeG4TiffGray(char *fname, float *skew,
00187 int image_rotation);
00188
00195 void objectInfoClass::writeG4TiffGray(char *fname, int orientation);
00196
00201 float getPeakValue() const {return( _peakValue);}
00202
00207 float getPeakRatio() const {return( _peakRatio);}
00208
00212 double getNormLineHeightVar() const {return( _normLineHeightVar);}
00213
00217 double getNormLineWidthVar() const {return( _normLineWidthVar);}
00218
00222 double getNormLineLeftIndentVar() const {return( _normLineLeftIndentVar);}
00223
00224
00228 double getNormLineRightIndentVar() const {return( _normLineRightIndentVar);}
00229
00233 double getAvgLineHeight() const {return (_avgLineHeight);}
00234
00235
00236 void computeScansoftSegments(DLImage *doink,objectInfo *imageObjects ,int *index,char *filename);
00237
00238 int countScansoftSegments(char *filename);
00239
00240 objectInfo *computeObjectInfo(std::string fname,int pixelDataFlag);
00241
00242 void computeInterlineSpacingFeatures(DLImage *doink,DLlineInfo *textLines,
00243 int long_line_count,float *peakValue, float *peakRatio);
00244 void find_first_peak(double *input_array,int length,int offset, int *location);
00245
00246 void computeLineStatistics( DLlineInfo *textLines, int long_line_count);
00247
00248 void getSkewRotationCorrectedImageData(char *fname,float *skew, int rotation,
00249 unsigned char **ptr, int *new_height, int *new_width);
00250
00251 int tbComputeDCT(double *input, int winsize, double **dct_array);
00252
00253 void tbCalcMeanStddev(float *values, int value_count, double *mean, double *stddev);
00254
00255 void tbStructureComputeMeanStddev(double *values, int value_count, int separation,
00256 double *mean, double *stddev);
00257
00258 void tbComputeMeanStddev(double *values, int value_count, double *mean,double *stddev);
00259
00260
00261
00262 private:
00263
00264 double _lineSlopeVarianceSqr;
00265
00268 unsigned long _originalHeight;
00269
00271 unsigned long _originalWidth;
00272
00275 unsigned long _height;
00276
00278 unsigned long _width;
00279
00280 JScript *_scriptInfo;
00281
00285 unsigned char *_pixelData;
00287 float _horRes;
00289 float _verRes;
00290
00293 unsigned char *_grayPixelData;
00294 int _objectCount;
00295 int _lineCount;
00296 int _scansoftFlag;
00298 int _dctFlag;
00299 char _script[20];
00300 char _bestFitScript[20];
00301 double _originalImageSkew;
00302 int _lineDirection;
00303 int _scriptIdOrientation;
00304 struct objectInfo *imageObjects;
00305 struct _upper_ext;
00306 struct _lower_ext;
00307 float _peakValue;
00308 float _peakRatio;
00309
00312 float _threshold;
00314 double _normLineHeightVar;
00315
00317 double _avgLineHeight;
00318
00320 double _normLineWidthVar;
00321
00323 double _normLineLeftIndentVar;
00324
00326 double _normLineRightIndentVar;
00327
00328 };
00329
00330 #endif
00331
00332
00333
00334