#include <objectInfoClass.h>
Public Member Functions | |
objectInfoClass (std::string filename, JScript *scriptInfo, int line_count_limit, char *output_directory, int pixelDataFlag, int scansoftFlag, int dctFlag) | |
objectInfoClass (std::string filename, JScript *scriptInfo, int scansoftFlag, int dctFlag) | |
~objectInfoClass () | |
int | getObjectCount () const |
objectInfo * | objectStructure () const |
void | printObjectInfo (char *filename) |
void | printTextLineInfo (FILE *fp) |
unsigned long | getImageHeight () const |
unsigned long | getImageWidth () const |
unsigned long | getOriginalImageHeight () const |
unsigned long | getOriginalImageWidth () const |
double | getOriginalSkew () const |
unsigned char * | getPixelData () const |
double | getSlopeVarSqr () const |
int | getLineCount () const |
char * | getScript () |
char * | getBestFitScript () |
int | getlineDirection () |
int | getscriptIdOrientation () |
void | writeG4Tiff (char *fname) |
void | writeG4TiffGray (char *fname, float *skew, int image_rotation) |
void | writeG4TiffGray (char *fname, int orientation) |
float | getPeakValue () const |
float | getPeakRatio () const |
double | getNormLineHeightVar () const |
double | getNormLineWidthVar () const |
double | getNormLineLeftIndentVar () const |
double | getNormLineRightIndentVar () const |
double | getAvgLineHeight () const |
void | computeScansoftSegments (DLImage *doink, objectInfo *imageObjects, int *index, char *filename) |
int | countScansoftSegments (char *filename) |
objectInfo * | computeObjectInfo (std::string fname, int pixelDataFlag) |
void | computeInterlineSpacingFeatures (DLImage *doink, DLlineInfo *textLines, int long_line_count, float *peakValue, float *peakRatio) |
void | find_first_peak (double *input_array, int length, int offset, int *location) |
void | computeLineStatistics (DLlineInfo *textLines, int long_line_count) |
void | getSkewRotationCorrectedImageData (char *fname, float *skew, int rotation, unsigned char **ptr, int *new_height, int *new_width) |
int | tbComputeDCT (double *input, int winsize, double **dct_array) |
void | tbCalcMeanStddev (float *values, int value_count, double *mean, double *stddev) |
void | tbStructureComputeMeanStddev (double *values, int value_count, int separation, double *mean, double *stddev) |
void | tbComputeMeanStddev (double *values, int value_count, double *mean, double *stddev) |
|
This method creates and instance of an objectInfoClass. The following paramenters are used: filename - path the the image file scriptInfo - an instance of the JScript classed that is used to calculate page level script identification line_count_limit - maximum number of text lines to include in the class output_directory - directory where output data files containing the object info. are stored. Prior to computing the needed features, the software will look in the specified data directory to see if a .dat file corresponding to the image exists. If so, the data in that file will be read in instead of recalculating the features. pixelDataFlag - indicates whether the pixel data for the image should be stored in the class scansoftFlag - unused at this point. Can be used, with code modifications to indicate if scansoft bounding boxes should be included. dctFlag - indicates if features relating to interline spacing should be included. |
|
This method creates and instance of an objectInfoClass. The following paramenters are used: filename - path the the image file scriptInfo - an instance of the JScript classed that is used to calculate page level script identification scansoftFlag - unused at this point. Can be used, with code modifications to indicate if scansoft bounding boxes should be included. dctFlag - indicates if features relating to interline spacing should be included. |
|
This method destroys and instance of an objectInfoClass. |
|
|
|
|
|
|
|
|
|
|
|
|
|
getAvgLineHeight returns the average line height |
|
getBestFitScript returns the best fitting image script as calculated by the JScript class. This may or may not be the same value as returned by the getScript class. There are occasions where an image is classified as unknown. This occurs when the best fitting script is |
|
getImageHeight returns the height of the deskewed image |
|
getImageWidth returns the width of the deskewed image |
|
getLineCount returns the number of text lines calculated |
|
getlineDirection returns the line orientation (horizontal/vertical) of the text |
|
getNormLineHeightVar returns the square of the normalized height variation |
|
getNormLineIndentVar returns the square of the left indent variation |
|
getNormLineRightIndentVar returns the square of the right indent variation |
|
getNormLineHeightVar returns the square of the normalized height variation |
|
|
|
getImageHeight returns the height of the original image |
|
getImageWidth returns the width of the original image |
|
getOriginalSkew returns the skew value calculated |
|
getPeakRatio - This is not yet implemented. |
|
getPeakValue returns the value corresponding to the first peak of the dct used to compute the interlinen spacing. This is not yet implemented. |
|
getPixelData returns an 8 bit/pixel representation of the pixel data |
|
getScript returns the image script as calculated by the JScript class |
|
getscriptIdOrientation returns the image orientation as calculated by the JScript class |
|
|
|
getSlopeVarSqr returns the variance of the slope values squared |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
writeG4Tiff writes out a group 4 compressed version of the original image |
|
writeG4Tiff writes out a group 4 compressed version of image rotated according to the values specified by the input parameters. This is not yet implemented. |
|
writeG4Tiff writes out a group 4 compressed version of image rotated and deskewed according to the values specified by the input parameters. |