Main Page | Namespace List | Class List | Directories | File List | Class Members | File Members

layout.h

Go to the documentation of this file.
00001 /* layout.h
00002    Class Clayout is used to form 5 dimenstion feature vectors (objvector) 
00003    for each textline in a document page. 
00004 */
00005 
00006 #if !defined(AFX_LAYOUT_H__41A5307C_A00B_418A_A668_D1C2E20618DF__INCLUDED_)
00007 #define AFX_LAYOUT_H__41A5307C_A00B_418A_A668_D1C2E20618DF__INCLUDED_
00008 
00009 #include <string>
00010 #include "../ObjectVector_hm/objcluster.h"      
00011 #include "../PageLayoutLib/objectInfoClass.h"
00012 
00013 struct rectangle
00014 {
00015         long left;
00016         long top;
00017         long right;
00018         long bottom;
00019 
00020         rectangle & operator = (rectangle b)
00021         {
00022                 left = b.left;
00023                 top = b.top;
00024                 right = b.right;
00025                 bottom = b.bottom;
00026                 return *this;
00027         }
00028 
00029 
00030 };
00031 
00032 
00033 class Clayout  
00034 {
00035 public:
00036         Clayout & operator = (Clayout & a);
00037         
00038         double                  m_pageheight;          // height between the toppest and lowest textline 
00039         double                  m_pagewidth;           // width between the leftmost endingpoint and the rightmost endingpoint
00040         
00041         objvector *             m_pObjectList;          
00042         int                             m_nbObjects;
00043 
00044         objcluster*             m_pClusterList;
00045         int                             m_nbClusters;
00046         
00047         
00048         void    BoundingBox(objectInfo * obj, int objcount, struct rectangle &r); //finding the minimum bounding box of all the objects
00049         void    Rotate(CTMatrix<double> &obj);                                             //Rotate the page so that the bounding box is upright
00050         void    ImportObjects(objectInfo *obj, int objcount); 
00051         
00052 
00053         // Kmeans clustering of object 5D vectors
00054         void    KmeansObjClustering(Clayout * pLayouts, int nObj, int nDoc, int nCluster, int maxiteration, objvector sigma, bool use_zero_flag = false);
00055         
00056         // compute the mean and variance of all samples, for usage of Kmeans clustering
00057         
00058         void    NormalizeObjects(Clayout * pLayouts, int nDoc, objvector &avg, objvector &sigma);
00059         
00060         // weighting wanted 5D object clusters
00061         void    Weight_Clusters_against(Clayout * pLayout, double probability_range = 0.5);
00062 
00063         // disk operation
00064         void SaveClustersToDisk(string fname, bool saveweights = true);
00065         void LoadClustersFromDisk(string fname);
00066                 
00067         Clayout();
00068         virtual ~Clayout();
00069         void Reset();
00070 
00071 private:
00072         CTArray<int>    m_index;                                //designate the cluster index, which the object belongs to
00073                 
00074         //objvector     m_avg;                          // used for objClustering               
00075         //objvector     m_sigma;
00076         //objvector     m_weight;
00077                                 
00078 };
00079 
00080 #endif // !defined(AFX_LAYOUT_H__41A5307C_A00B_418A_A668_D1C2E20618DF__INCLUDED_)

Generated on Tue Aug 29 11:42:39 2006 for PageLayoutDOCLIB by  doxygen 1.4.2