00001
00002 #ifndef UTILITY_H_HM_2005_06_16
00003 #define UTILITY_H_HM_2005_06_16
00004
00005 #include "../PageLayoutLib/objectPairClass.h"
00006 #include "../PairVector_hm/pairshape.h"
00007 #include "../ObjectVector_hm/objcluster.h"
00008 #include "../Layout_hm/layout.h"
00009 #include <string>
00010 #include <deque>
00011 typedef deque<string> cstring_array;
00012
00013
00014
00015 #define PAIR_GENERIC 0 // using Arkin's turning function to represent a polygon formed by textline pair
00016 #define PAIR_VECTOR_BOTH 1 // using 5D vectors to describe the polygon shape formed by textline pair
00017 #define SINGLE_VECTOR_BOTH 2 // using 5D vectors to describe the single textline properties
00018 #define PAIR_VECTOR_WANTED 3 // same as 1, but without use of unwanted training samples
00019 #define SINGLE_VECTOR_WANTED 4 // same as 2, but without use of unwanted training samples
00020
00022
00023
00024
00025 #define PSV_N_1 1
00026 #define PSV_N_2 0
00027
00028
00029 #define FIRST_CENTROID 0 // used in method 0, choose the first polygon as cluster centroid
00030 #define DYN_APPROXIMATED 1 // used in method 0, dynamically changing the centroid as the one with minimum sum of distance to all cluster members
00031 #define KMEANS_MERGY 2 // for all vector based methods
00032
00033
00034
00035
00036
00037 #define BEST_NEIGHBOR 0 // choose the training cluster with the hightest training score,
00038 #define NEAREST_NEIGHBOR 1 // choose the training cluster with the hightest similiary to the testing polygon,
00039 #define GAUSS_NEIGHBORHOOD 2 // use sum of all the training clusters within the whole range,weighted by similarity distance
00040
00041
00042
00043
00044 void FindFile(string path, char * file_attr, cstring_array * filearray);
00045
00046
00047
00048 int LoadTiffImageSet(cstring_array &retSet, string filefolder);
00049
00050
00051
00052 objectPairClass* ExtractObjPair(string fname, int maxNbTxline = 128);
00053
00054
00055
00056 pairshape_cluster_list* ExtractObjPairVector(string fname, int style , int nline = 128, int docID = 0);
00057
00058
00059 int MyCmp(const void * arg1, const void * arg2);
00060
00061
00062
00063 double BelongProb(objcluster *pClusters, int nCluster, Clayout *pTest);
00064
00065 #endif