00001 #ifndef PAIRSHAPE_H_HM_2005
00002 #define PAIRSHAPE_H_HM_2005
00003
00004 #include <fstream>
00005
00006 #include "../PageLayoutLib/objectInfoClass.h"
00007 #include "math.h"
00008 #include <string>
00009
00010 #include "../Utility_hm/TMatrix.h"
00011
00012
00022 struct pairshape_vector
00023 {
00024 double Len1;
00025 double Len2;
00026 double Len3;
00027 double alpha;
00028 double belta;
00029
00030 int type1;
00031 int type2;
00032 int FontHeight1;
00033 int FontHeight2;
00034
00035 int doc_ID;
00036
00041 void write_stream( ofstream &f )
00042 {
00043 f << "---------pairshape-vector---------" << endl;
00044
00045 f << Len1 << endl;
00046 f << Len2 << endl;
00047 f << Len3 << endl;
00048 f << alpha << endl;
00049 f << belta << endl;
00050
00051 f << type1 << endl;
00052 f << type2 << endl;
00053 f << FontHeight1 << endl;
00054 f << FontHeight2 << endl;
00055
00056 f << doc_ID << endl;
00057
00058 }
00059
00064 void read_stream( ifstream &f )
00065 {
00066 char buf[100];
00067 f >> buf;
00068
00069 f >> Len1 ;
00070 f >> Len2 ;
00071 f >> Len3 ;
00072 f >> alpha ;
00073 f >> belta ;
00074
00075 f >> type1 ;
00076 f >> type2 ;
00077 f >> FontHeight1 ;
00078 f >> FontHeight2 ;
00079
00080 f >> doc_ID ;
00081
00082 }
00083
00084
00088 pairshape_vector()
00089 {
00090 Len1 = Len2 = Len3 = alpha = belta = 0;
00091
00092 FontHeight1 = 0;
00093 FontHeight2 = 0;
00094
00095 type1 = type2 = -1;
00096 doc_ID = -1;
00097 }
00098
00102 pairshape_vector(double l1, double l2, double l3, double a, double b)
00103 {
00104 Len1 = l1; Len2 = l2; Len3 = l3; alpha = a; belta = b;
00105 }
00106
00107 public:
00108 void Construction(objectInfo &obj, pointLocation & ref1, pointLocation & ref2);
00109
00110 pairshape_vector Mirror();
00111
00112 double Euc_dist(pairshape_vector &other);
00113
00114 double Hur_dist(pairshape_vector &other);
00115
00116 void Normalization();
00117
00118 pairshape_vector Reverse();
00119
00120 double match(pairshape_vector &a,bool upside = false);
00121
00122 void Construction(struct objectInfo& O1, struct objectInfo& O2);
00123
00124 pairshape_vector operator = (pairshape_vector a);
00125
00126 bool operator == (pairshape_vector a);
00127
00128 double norm();
00129
00130 double normsquare();
00131
00132 pairshape_vector operator - (pairshape_vector a);
00133
00134 pairshape_vector operator + (pairshape_vector a);
00135
00136 pairshape_vector operator * (double a);
00137
00138 pairshape_vector operator / (double a);
00139
00140 pairshape_vector dotproduct(pairshape_vector a);
00141
00142 pairshape_vector dotsqrt();
00143
00144 pairshape_vector dotdivide(pairshape_vector a);
00145
00146 pairshape_vector objsqrt();
00147
00148 };
00149
00150
00151
00152
00153 struct pairshape_vector_list
00154 {
00155 struct pairshape_vector * m_pVectorList;
00156 int m_nVectors;
00157
00161 pairshape_vector_list(){ m_pVectorList = NULL; m_nVectors = 0; }
00162 void Construction(struct objectInfo *obj, int object_count,int documentId, int style);
00163
00167 ~pairshape_vector_list()
00168 {
00169 if(m_pVectorList)
00170 delete []m_pVectorList;
00171 }
00172 public:
00173
00174
00175 void Append(pairshape_vector_list *addon);
00176 void Reset();
00177
00182 void read_stream( ifstream &f )
00183 {
00184 char buf[100];
00185 f >> buf;
00186 f >> buf; f >> m_nVectors;
00187
00188 if( m_pVectorList ) delete []m_pVectorList;
00189 m_pVectorList = new pairshape_vector[m_nVectors];
00190 for( int i=0; i<m_nVectors; i++ )
00191 m_pVectorList[i].read_stream( f ) ;
00192 };
00193
00198 void write_stream( ofstream &f )
00199 {
00200 f << "***********pairshape-vector-list************" << endl;
00201 f << "nbVector_in_list: " << m_nVectors << endl;
00202
00203 int i = 0;
00204 for(; i<m_nVectors; i++)
00205 m_pVectorList[i].write_stream(f);
00206
00207 };
00208 };
00209
00210
00211
00212 class pairshape_cluster
00213 {
00214 public:
00215 pairshape_vector_list m_MemberList;
00216 int m_nMember;
00217 pairshape_vector m_centroid;
00218 pairshape_vector m_sigma;
00219
00220 double m_weight;
00221 double m_avgNum_perdoc;
00222 double m_sigmaNum_perdoc;
00223
00227 pairshape_cluster()
00228 {
00229 m_weight = 1;
00230 }
00231
00232 void write_stream( ofstream &f );
00233 void read_stream( ifstream &f );
00234 void Reset();
00235 pairshape_cluster operator = (pairshape_cluster a);
00236 };
00237
00238
00239 #define KMEANS 0
00240 #define DYN 1
00241
00242
00243 #define range_vector 0.2
00244 #define range_cluster 0.1
00245 #define range_weightcluster 0.1
00246 #define range_score 0.2
00247
00248 #define this_iterations 2
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 class pairshape_cluster_list
00261 {
00262 public:
00263 pairshape_cluster * m_pClusterList;
00264 int m_nClusters;
00265
00269 pairshape_cluster_list()
00270 {
00271 m_pClusterList = NULL;
00272 m_nClusters = 0;
00273 }
00277 ~pairshape_cluster_list()
00278 {
00279 if(m_pClusterList)
00280 delete []m_pClusterList;
00281 }
00282
00286 void Reset()
00287 {
00288 int i;
00289 for(i=0;i<m_nClusters;i++)
00290 m_pClusterList[i].Reset();
00291
00292 delete []m_pClusterList;
00293
00294 m_pClusterList = NULL;
00295 m_nClusters = 0;
00296 }
00297 public:
00298 double ProbScoreAPage(pairshape_vector_list * pTest, bool upside = false, bool mirror = false);
00299 double m_nAvgVperDoc;
00300 double m_noiseVRatio;
00301 int m_nTotalVectors;
00302
00303 void ClusterStat(int nDoc=1, bool flag = false);
00304 double Probability(pairshape_vector_list *pTest);
00305 void CheckSTD(pairshape_vector &targ);
00306 double Probability(pairshape_cluster_list * pTest);
00307
00308 void dyn_Clustering(pairshape_cluster_list **pClusterList, int nList, bool stat_flag = true, bool flag = false);
00309 void dyn_Clustering(pairshape_vector_list * pMultiDocLists, int nDoc=1, bool flag = false);
00310 double ScoreAPage(pairshape_cluster_list *pTest,int method,bool upside = true);
00311 double ScoreOnClusterList(pairshape_cluster_list *pList, int method = 1);
00312
00313 void WeightingAgainst(pairshape_cluster_list *pOther);
00314 void SetDefaultSigma(pairshape_vector &sigma);
00315
00316
00317 void SaveToDisk(string outfile, int style, bool saveweight = true);
00318 void LoadFromDisk( string infile , int & style);
00319 };
00320
00321
00322
00323 #ifndef INCREAMENTAL_AMOUNT
00324 #define INCREAMENTAL_AMOUNT 12
00325 #endif
00326
00327 struct b_Cluster
00328 {
00329
00330
00331 CTMatrix<int> memberlist;
00332 int nMember;
00333 pairshape_vector centroid;
00334
00335 int nTotalMember;
00336
00340 b_Cluster()
00341 {
00342
00343 nMember = 0;
00344 nTotalMember = 0;
00345
00346 }
00347
00351 ~b_Cluster()
00352 {
00353 memberlist.Destruction();
00354 }
00355
00363 void Add(int page_index, int member_index, pairshape_vector &newone, int nNewMember=1)
00364 {
00365 if(nMember == memberlist.GetRows())
00366 {
00367 CTMatrix<int> tmp = memberlist;
00368 memberlist.Construction(nMember + INCREAMENTAL_AMOUNT,2);
00369 int i,j;
00370 for(i=0;i<nMember;i++)
00371 for(j=0;j<2;j++)
00372 memberlist[i][j] = tmp[i][j];
00373 }
00374
00375 memberlist[nMember][0] = page_index;
00376 memberlist[nMember][1] = member_index;
00377
00378 pairshape_vector tmp = centroid*(double)nTotalMember + newone*(double)nNewMember;
00379 centroid = tmp/(double)(nTotalMember+nNewMember);
00380
00381
00382
00383 nMember ++;
00384 nTotalMember += nNewMember;
00385 }
00386 };
00387
00388 #endif