Main Page | Namespace List | Class List | Directories | File List | Class Members | File Members

objectPairClass.h

Go to the documentation of this file.
00001 #ifndef __objectPairClass_H
00002 #define __objectPairClass_H
00003 
00004 #ifndef __objectInfoClass_H
00005 #include "objectInfoClass.h"
00006 #endif
00007 
00008 #include <valarray>
00009 #include "../Utility_hm/TArray.h"
00010 
00011 #define CALLOC_BLOCK_SIZE 20000
00012 #define DOCLIST_INIT  500
00013 #define AREA_MATCH_THRESHOLD  .1
00014 #define AREA_MATCH_THRESHOLD_PIXEL_AREA  .1
00015 #define POLY_MATCH_THRESHOLD  .5
00016 #define LINE_MATCH_THRESHOLD_PERCENT  .1
00017 
00018 /* 50 pixels equates to about 1/4 inch */
00019 #define LINE_MATCH_THRESHOLD_PIXEL_DISTANCE 50
00020 
00021 /*
00022 #ifndef __polyClass_H
00023 #include "polyClass.h"
00024 #endif
00025 */
00026 #ifndef __polyMatchClass_H
00027 #include "polyMatchClass.h"
00028 #endif
00029 
00030 #define MAX_QUALIFIER_LEN 80
00031 
00032 struct objectPairInfo
00033 {
00034         int                     object1;        /* used to store the first object number in the pair -not stored */
00035         int                     object2;        /* used to store the second object number in the pair -not stored*/
00036         objectType      type1;          /* note the types are defined in objectInfoClass.h> */
00037         objectType      type2;
00038         char            obj1_qualifier[MAX_QUALIFIER_LEN];
00039         char            obj2_qualifier[MAX_QUALIFIER_LEN];
00040         int                     obj1_int_qualifier;
00041         int                     obj2_int_qualifier;
00042         pointLocation      vertices[4];
00043 
00044         float           pageInstanceCount;     /* occurances on page; avg_per doc after merge*/
00045         int                     docsCount;             /* # docs containing this pair */
00046         int                     totalInstanceCount;
00047         int                     availableInstanceCount;
00048         float           area;
00049         float           area_upper_thresh;
00050         float           area_lower_thresh;
00051         int                     crossing;
00052         int                     tag;
00053         float           score;           /* score used to measure how unique pair is to data set relative to another data set */
00054 
00055         TURN_REP_REC t;
00056 
00057 public:
00058         double nPairPerDoc_sigma;
00059 
00060         int          doc_index;  //hm   
00061         CTArray<int> doc_array;  //hm, for clustering 
00062 
00063         objectPairInfo& operator = (objectPairInfo a)
00064         {
00065                 object1 = a.object1;
00066                 object2 = a.object2;
00067                 type1   = a.type1;
00068                 type2   = a.type2;
00069                 strcpy(obj1_qualifier, a.obj1_qualifier);
00070                 strcpy(obj2_qualifier, a.obj2_qualifier);
00071                 obj1_int_qualifier = a.obj1_int_qualifier;
00072                 obj2_int_qualifier = a.obj2_int_qualifier;
00073                 int i;
00074                 for(i=0;i<4;i++)
00075                         vertices[i] = a.vertices[i];
00076 
00077                 pageInstanceCount = a.pageInstanceCount;
00078                 docsCount = a.docsCount;
00079                 totalInstanceCount = a.totalInstanceCount;
00080                 availableInstanceCount = a.availableInstanceCount;
00081                 area = a.area;
00082                 area_upper_thresh = a.area_upper_thresh;
00083                 area_lower_thresh = a.area_lower_thresh;
00084                 crossing = a.crossing;
00085                 tag = a.tag;
00086                 score = a.score;
00087                 memcpy(&t, &a.t, sizeof(TURN_REP_REC));
00088                 
00089                 doc_index = a.doc_index;
00090                 doc_array = a.doc_array;
00091 
00092                 return *this;
00093         }
00094 };
00095 
00096 
00097 // local structure used in objectPairClass::prune2();   --- hm 
00098 struct pairCluster
00099 {
00100         int                             centroid;                       // index of centroid object pair
00101         int                             nMember;
00102         double                  sum_dist;                       // sum of distance from each object pair to centroid pair
00103         CTArray<int>    memberlist;                     // index of all members belong to one cluster
00104         pairCluster()
00105         {
00106                 sum_dist = 0;
00107                 nMember = 0;
00108                 centroid = -1;
00109         }
00110         void Add(int index)                                // appending a new member into current cluster member list
00111         {
00112                 if(nMember == memberlist.GetDimension())
00113                 {
00114                         CTArray<int> tmp = memberlist;
00115                         memberlist.Construction(nMember + 11);
00116                         int i;
00117                         for(i=0;i<nMember;i++)
00118                                 memberlist[i] = tmp[i];
00119                 }
00120                 memberlist[nMember++] = index;
00121         }
00122 };
00123 
00124 
00125 class objectPairClass 
00126 {
00127 public: 
00128 
00129         // constructor and distructor
00130         objectPairClass();
00131         objectPairClass(FILE *fp);
00132         objectPairClass(const char *data_path,FILE *fp); /* opens data from file*/
00133         objectPairClass(struct objectInfo *obj1, int object_count,int documentId,FILE *fp);
00134         ~objectPairClass();
00135         
00136         // loading data from disk, called in constructor
00137         void readDataWithScores(FILE *fp);
00138 
00139         // retrieve routines
00140         int  getPairCount() const {return( _numPairsAfterPrune);}
00141         objectPairInfo *getPairs() const {return(_objectPairs);}
00142 
00143         // weighting wanted polygon clusters against unwanted clusters
00144         void assignScore (objectPairClass *otherClass); // int nDoc_wanted, int nDoc_unwanted); //updated by hm
00145         void scoreImage(objectPairClass *objects,float *score, int neighbor);  //updated by hm
00146         void dumpHighScoringObjects(struct objectInfo *obj1,float *threshold);
00147 
00148         // Save training results to disk
00149         void printInfoSummary(char *filename);
00150         void printVertices();
00151         void printInfo(string filename);
00152         void printScoreInfo(string filename);
00153 
00154         // merge similar polygons
00155         void merge(objectPairClass *newData, int new_doc_number);
00156         void prune();   
00157         void prune2(bool isMerge = false, bool reallocmemory = true); //added by hm
00158         //double        MyTestScore(objectPairClass *pTest);
00159 
00160         // post prune: to delete low weight wanted clusters
00161         void scoreBasedPrune(float threshold);
00162         void frequencyBasedPrune(int threshold, FILE * coref_file);
00163         void frequencyBasedPruneForScoring(int threshold);      
00164         
00165         // compute similarity between two polygons
00166         int  matches(objectPairInfo *pair1,  objectPairInfo *pair2, double *score = 0);
00167         bool match2(objectPairInfo *pair1, objectPairInfo *pair2, double* dist); //hm, obsolete
00168 
00169         float   getNormalizationFactor() const {return( _normalizationFactor);}
00170         void    setNormalizationFactor(float *normalizationFactor); 
00171         void    setLookupBins(valarray <float> precision);
00172         float   lookupScore(float *normalizedScore);
00173 
00174         float                           corrective_factor;
00175         objectPairInfo *        _objectPairs;
00176         FILE *                          _coref_file;
00177         int                                     _numPairs;
00178         int                                     _numPairsAfterPrune;
00179         int                                     _availableEntries;                      //used for memory reallocation
00180         int                                     _frequencyPrunedLevel;
00181 
00182         int                                     _numDoc; //number of doc where _objectPairs come from
00183 
00184         float                           _normalizationFactor;
00185         valarray <float>        _lookupBins;
00186 
00187         void SaveToDisk(string fname);    // added by hm
00188         void LoadFromDisk(string fname);
00189 };
00190 
00191 class objectPairMatrix 
00192 {
00193 
00194 public:
00195 
00196 objectPairMatrix (vector<std::string> inputVector,
00197         FILE *fp,char *directory_path, char *final_pair_filename,
00198         JScript *scriptId);
00199 
00200 private:
00201 
00202 
00203 };
00204 
00205 
00206 #endif

Generated on Tue Aug 29 11:42:39 2006 for PageLayoutDOCLIB by  doxygen 1.4.2