Main Page | Namespace List | Class List | Directories | File List | Class Members | File Members

utility.h

Go to the documentation of this file.
00001 
00002 #ifndef UTILITY_H_HM_2005_06_16
00003 #define UTILITY_H_HM_2005_06_16
00004 
00005 #include "../PageLayoutLib/objectPairClass.h"
00006 #include "../PairVector_hm/pairshape.h"
00007 #include "../ObjectVector_hm/objcluster.h"
00008 #include "../Layout_hm/layout.h"
00009 #include <string>
00010 #include <deque>
00011 typedef deque<string> cstring_array;
00012 
00013 
00014 // training methods
00015 #define PAIR_GENERIC            0               // using Arkin's turning function to represent a polygon formed by textline pair
00016 #define PAIR_VECTOR_BOTH                1       // using 5D vectors to describe the polygon shape formed by textline pair
00017 #define SINGLE_VECTOR_BOTH      2               // using 5D vectors to describe the single textline properties
00018 #define PAIR_VECTOR_WANTED              3       // same as 1, but without use of unwanted training samples
00019 #define SINGLE_VECTOR_WANTED    4               // same as 2, but without use of unwanted training samples
00020 
00022 //#define PAIR_VECTOR_BOTH_N_1  5        
00023 //#define PAIR_VECTOR_WANTED_N_1        6  
00024 
00025 #define PSV_N_1                                 1
00026 #define PSV_N_2                                 0
00027 
00028 // mergy style 
00029 #define FIRST_CENTROID              0           // used in method 0, choose the first polygon as cluster centroid
00030 #define DYN_APPROXIMATED                1               // used in method 0, dynamically changing the centroid as the one with minimum sum of distance to all cluster members
00031 #define KMEANS_MERGY            2           // for all vector based methods
00032 
00033 
00034 // definitions used for TestingClass::m_neighbor
00035 // scoring methods in testing process, valid only in PAIR_GENERIC method
00036 // all methods are to choose training clusters within fixed similarity range to a testing polygon(cluster)
00037 #define BEST_NEIGHBOR               0           // choose the training cluster with the hightest training score, 
00038 #define NEAREST_NEIGHBOR                1               // choose the training cluster with the hightest similiary to the testing polygon, 
00039 #define GAUSS_NEIGHBORHOOD              2       // use sum of all the training clusters within the whole range,weighted by similarity distance 
00040 
00041 
00042 // find filenames whose suffix match "file_attr" in the folder of "path" including subfolders,
00043 // and put all entries into "filearray"
00044 void    FindFile(string path, char * file_attr, cstring_array * filearray);
00045 
00046 // load all tiff images' filenames under the folder of "filefolder" into "retSet".
00047 // this function is a simple call to "FindFile()"
00048 int             LoadTiffImageSet(cstring_array &retSet, string filefolder);
00049 
00050 // routine used for extract textlines from the document "fname" 
00051 // and form textline pairs for PairGeneric method
00052 objectPairClass* ExtractObjPair(string fname, int maxNbTxline = 128);
00053 
00054 // routine used for extract textlines from the document "fname" 
00055 // and form 5D textline pair vectors for Pair_VECTOR_XX method
00056 pairshape_cluster_list* ExtractObjPairVector(string fname, int style , int nline = 128, int docID = 0);
00057 
00058 // local function used in TestingClass::SortScores();
00059 int             MyCmp(const void * arg1, const void * arg2);
00060 
00061 // compute the probability that a testing layout belongs to the training class
00062 // used in 5D SINGLE_VECTOR_XX method
00063 double  BelongProb(objcluster *pClusters, int nCluster, Clayout *pTest);
00064 
00065 #endif

Generated on Tue Aug 29 11:42:40 2006 for PageLayoutDOCLIB by  doxygen 1.4.2