Main Page | Namespace List | Class List | Directories | File List | Class Members | File Members

pairshape.h

Go to the documentation of this file.
00001 #ifndef PAIRSHAPE_H_HM_2005
00002 #define PAIRSHAPE_H_HM_2005
00003 
00004 #include <fstream>
00005 
00006 #include "../PageLayoutLib/objectInfoClass.h"
00007 #include "math.h"
00008 #include <string>
00009 
00010 #include "../Utility_hm/TMatrix.h"
00011 
00012 
00022 struct pairshape_vector
00023 {
00024         double Len1;   // length of one textline
00025         double Len2;   // length of the other textline
00026         double Len3;   // length of the line which connect the midpoints of the two textlines
00027         double alpha;  // Length Between left endpoint of textline1 and right endpoint of textline2;
00028         double belta;  // Length Between right endpoint of textline1 and left endpoint of textline2;
00029 
00030         int             type1;                  // type of the first object: textline, tableline, etc.
00031         int             type2;
00032         int             FontHeight1;    // font size of the first textline
00033         int             FontHeight2;    // font size of the second textline
00034 
00035         int             doc_ID; // reserved
00036 
00041         void write_stream( ofstream &f )
00042         {
00043                 f << "---------pairshape-vector---------" << endl;
00044 
00045                 f << Len1 << endl;   // length of one textline
00046                 f << Len2 << endl;   // length of the other textline
00047                 f << Len3 << endl;   // length of the line which connect the midpoints of the two textlines
00048                 f << alpha << endl;  // Length Between left endpoint of textline1 and right endpoint of textline2;
00049                 f << belta << endl;  // Length Between right endpoint of textline1 and left endpoint of textline2;
00050 
00051                 f << type1 << endl;                     // type of the first object: textline, tableline, etc.
00052                 f << type2 << endl;
00053                 f << FontHeight1 << endl;       // font size of the first textline
00054                 f << FontHeight2 << endl;       // font size of the second textline
00055 
00056                 f << doc_ID << endl; // reserved
00057         
00058         }
00059 
00064         void read_stream( ifstream &f )
00065         {
00066                 char    buf[100];
00067                 f >> buf;
00068 
00069                 f >> Len1 ;   // length of one textline
00070                 f >> Len2 ;   // length of the other textline
00071                 f >> Len3 ;   // length of the line which connect the midpoints of the two textlines
00072                 f >> alpha ;  // Length Between left endpoint of textline1 and right endpoint of textline2;
00073                 f >> belta ;  // Length Between right endpoint of textline1 and left endpoint of textline2;
00074 
00075                 f >> type1 ;                    // type of the first object: textline, tableline, etc.
00076                 f >> type2 ;
00077                 f >> FontHeight1 ;      // font size of the first textline
00078                 f >> FontHeight2 ;      // font size of the second textline
00079 
00080                 f >> doc_ID ;      // reserved
00081 
00082         }
00083 
00084 
00088         pairshape_vector()
00089         { 
00090                 Len1 = Len2 = Len3 = alpha = belta = 0;
00091 
00092                 FontHeight1 = 0;
00093                 FontHeight2 = 0;
00094 
00095                 type1 = type2 = -1;
00096                 doc_ID = -1;
00097         }
00098         
00102         pairshape_vector(double l1, double l2, double l3, double a, double b)
00103         {
00104                 Len1 = l1; Len2 = l2; Len3 = l3; alpha = a; belta = b;
00105         }
00106         
00107 public:
00108         void Construction(objectInfo &obj, pointLocation & ref1, pointLocation & ref2);
00109         
00110         pairshape_vector Mirror();
00111 
00112         double Euc_dist(pairshape_vector &other);
00113 
00114         double Hur_dist(pairshape_vector &other);
00115 
00116         void Normalization();
00117 
00118         pairshape_vector Reverse();
00119 
00120         double match(pairshape_vector &a,bool upside = false);
00121 
00122         void Construction(struct objectInfo& O1, struct objectInfo& O2);        
00123 
00124         pairshape_vector operator = (pairshape_vector a);
00125 
00126         bool operator == (pairshape_vector a);
00127 
00128         double norm();
00129 
00130         double normsquare();
00131 
00132         pairshape_vector operator - (pairshape_vector a);
00133 
00134         pairshape_vector operator + (pairshape_vector a);
00135 
00136         pairshape_vector operator * (double a);
00137 
00138         pairshape_vector operator / (double a);
00139 
00140         pairshape_vector dotproduct(pairshape_vector a);
00141 
00142         pairshape_vector dotsqrt();
00143         
00144         pairshape_vector dotdivide(pairshape_vector a);
00145         
00146         pairshape_vector objsqrt();
00147 
00148 };
00149 
00150 
00151 
00152 
00153 struct pairshape_vector_list
00154 {
00155         struct pairshape_vector * m_pVectorList;
00156         int m_nVectors;
00157 
00161         pairshape_vector_list(){ m_pVectorList = NULL; m_nVectors = 0; }
00162         void Construction(struct objectInfo *obj, int object_count,int documentId, int style);
00163 
00167         ~pairshape_vector_list()
00168         {
00169                 if(m_pVectorList)
00170                         delete []m_pVectorList;
00171         }
00172 public:
00173         //void Construction2(struct objectInfo *obj, int object_count, int documentId); 
00174         
00175         void Append(pairshape_vector_list *addon);
00176         void Reset();
00177 
00182         void read_stream( ifstream &f )
00183         {
00184                 char buf[100];
00185                 f >> buf;
00186                 f >> buf; f >> m_nVectors;
00187 
00188                 if( m_pVectorList ) delete []m_pVectorList;
00189                 m_pVectorList   = new pairshape_vector[m_nVectors];
00190                 for( int i=0; i<m_nVectors; i++ )
00191                         m_pVectorList[i].read_stream( f )  ;
00192         };
00193 
00198         void write_stream( ofstream &f )
00199         {
00200                 f << "***********pairshape-vector-list************" << endl;
00201                 f << "nbVector_in_list: " << m_nVectors << endl;
00202 
00203                 int i = 0;
00204                 for(; i<m_nVectors; i++)
00205                         m_pVectorList[i].write_stream(f);
00206                 
00207         };
00208 };
00209 
00210 
00211 
00212 class pairshape_cluster
00213 {
00214 public:
00215         pairshape_vector_list   m_MemberList;   
00216         int                                             m_nMember;
00217         pairshape_vector                m_centroid;
00218         pairshape_vector                m_sigma;
00219 
00220         double              m_weight;
00221         double              m_avgNum_perdoc;
00222         double              m_sigmaNum_perdoc;
00223 
00227         pairshape_cluster()
00228         {
00229                 m_weight = 1;
00230         }
00231 
00232         void write_stream( ofstream &f );
00233         void read_stream( ifstream &f );
00234         void Reset();
00235         pairshape_cluster operator = (pairshape_cluster a);
00236 };
00237 
00238 
00239 #define KMEANS  0
00240 #define DYN     1
00241 
00242 
00243 #define         range_vector            0.2
00244 #define         range_cluster           0.1
00245 #define         range_weightcluster 0.1
00246 #define         range_score                     0.2
00247 
00248 #define     this_iterations             2
00249 
00250 /*
00251 // for 5D txtline shape vector
00252 #define         range_vector            0.05
00253 #define         range_cluster           0.03
00254 #define         range_weightcluster 0.03
00255 #define         range_score                     0.1
00256 
00257 #define     this_iterations             2
00258 */
00259 
00260 class pairshape_cluster_list
00261 {
00262 public:
00263         pairshape_cluster * m_pClusterList;
00264         int m_nClusters;
00265 
00269         pairshape_cluster_list()
00270         {
00271                 m_pClusterList = NULL; 
00272                 m_nClusters = 0;
00273         }
00277         ~pairshape_cluster_list()
00278         {
00279                 if(m_pClusterList)
00280                         delete []m_pClusterList;
00281         }
00282 
00286         void Reset()
00287         {
00288                 int i;
00289                 for(i=0;i<m_nClusters;i++)
00290                         m_pClusterList[i].Reset();
00291 
00292                 delete []m_pClusterList;
00293 
00294                 m_pClusterList = NULL;
00295                 m_nClusters = 0;
00296         }
00297 public:
00298         double  ProbScoreAPage(pairshape_vector_list * pTest, bool upside = false, bool mirror = false);
00299         double  m_nAvgVperDoc;
00300         double  m_noiseVRatio;  
00301         int             m_nTotalVectors;
00302 
00303         void   ClusterStat(int nDoc=1, bool flag = false);
00304         double Probability(pairshape_vector_list *pTest);
00305         void   CheckSTD(pairshape_vector &targ);
00306         double Probability(pairshape_cluster_list * pTest);
00307         
00308         void   dyn_Clustering(pairshape_cluster_list **pClusterList, int nList, bool stat_flag = true, bool flag = false);
00309         void   dyn_Clustering(pairshape_vector_list * pMultiDocLists, int nDoc=1, bool flag = false);
00310         double ScoreAPage(pairshape_cluster_list *pTest,int method,bool upside = true);
00311         double ScoreOnClusterList(pairshape_cluster_list *pList, int method = 1);
00312         
00313         void   WeightingAgainst(pairshape_cluster_list *pOther);
00314         void   SetDefaultSigma(pairshape_vector &sigma);
00315 
00316         //void   kmeans_Clustering(pairshape_vector_list * pDocLists, int nDoc, int nTargetClusters);
00317         void   SaveToDisk(string outfile, int style, bool saveweight = true);
00318         void   LoadFromDisk( string infile , int & style);
00319 };
00320 
00321 
00322 
00323 #ifndef INCREAMENTAL_AMOUNT
00324 #define INCREAMENTAL_AMOUNT 12
00325 #endif
00326 
00327 struct b_Cluster
00328 {
00329 //      int                             centroid[2]; // index of centroid object/pair, page_index/member_index
00330 //      double                  sum_dist;    //sum of distance from each object pair to centroid pair
00331         CTMatrix<int>   memberlist;
00332         int                             nMember;        
00333         pairshape_vector centroid;
00334 
00335         int                             nTotalMember; //for clustering on clusters
00336 
00340         b_Cluster()
00341         {
00342 //              sum_dist = 0;
00343                 nMember = 0;
00344                 nTotalMember = 0;
00345 //              centroid[0] = centroid[1] = -1;
00346         }
00347 
00351         ~b_Cluster()
00352         {
00353                 memberlist.Destruction();
00354         }       
00355         
00363         void Add(int page_index, int member_index, pairshape_vector &newone, int nNewMember=1)
00364         {
00365                 if(nMember == memberlist.GetRows())
00366                 {
00367                         CTMatrix<int> tmp = memberlist;
00368                         memberlist.Construction(nMember + INCREAMENTAL_AMOUNT,2);
00369                         int i,j;
00370                         for(i=0;i<nMember;i++)
00371                             for(j=0;j<2;j++)
00372                                 memberlist[i][j] = tmp[i][j];
00373                 }
00374 
00375                 memberlist[nMember][0] = page_index;
00376                 memberlist[nMember][1] = member_index;
00377 
00378                 pairshape_vector tmp = centroid*(double)nTotalMember + newone*(double)nNewMember;
00379                 centroid = tmp/(double)(nTotalMember+nNewMember);
00380 
00381 //              ASSERT(centroid.norm()>0);
00382 
00383                 nMember ++;
00384                 nTotalMember += nNewMember;
00385         }
00386 };
00387 
00388 #endif

Generated on Tue Aug 29 11:42:40 2006 for PageLayoutDOCLIB by  doxygen 1.4.2