00001 #ifndef __objectPairClass_H
00002 #define __objectPairClass_H
00003
00004 #ifndef __objectInfoClass_H
00005 #include "objectInfoClass.h"
00006 #endif
00007
00008 #include <valarray>
00009 #include "../Utility_hm/TArray.h"
00010
00011 #define CALLOC_BLOCK_SIZE 20000
00012 #define DOCLIST_INIT 500
00013 #define AREA_MATCH_THRESHOLD .1
00014 #define AREA_MATCH_THRESHOLD_PIXEL_AREA .1
00015 #define POLY_MATCH_THRESHOLD .5
00016 #define LINE_MATCH_THRESHOLD_PERCENT .1
00017
00018
00019 #define LINE_MATCH_THRESHOLD_PIXEL_DISTANCE 50
00020
00021
00022
00023
00024
00025
00026 #ifndef __polyMatchClass_H
00027 #include "polyMatchClass.h"
00028 #endif
00029
00030 #define MAX_QUALIFIER_LEN 80
00031
00032 struct objectPairInfo
00033 {
00034 int object1;
00035 int object2;
00036 objectType type1;
00037 objectType type2;
00038 char obj1_qualifier[MAX_QUALIFIER_LEN];
00039 char obj2_qualifier[MAX_QUALIFIER_LEN];
00040 int obj1_int_qualifier;
00041 int obj2_int_qualifier;
00042 pointLocation vertices[4];
00043
00044 float pageInstanceCount;
00045 int docsCount;
00046 int totalInstanceCount;
00047 int availableInstanceCount;
00048 float area;
00049 float area_upper_thresh;
00050 float area_lower_thresh;
00051 int crossing;
00052 int tag;
00053 float score;
00054
00055 TURN_REP_REC t;
00056
00057 public:
00058 double nPairPerDoc_sigma;
00059
00060 int doc_index;
00061 CTArray<int> doc_array;
00062
00063 objectPairInfo& operator = (objectPairInfo a)
00064 {
00065 object1 = a.object1;
00066 object2 = a.object2;
00067 type1 = a.type1;
00068 type2 = a.type2;
00069 strcpy(obj1_qualifier, a.obj1_qualifier);
00070 strcpy(obj2_qualifier, a.obj2_qualifier);
00071 obj1_int_qualifier = a.obj1_int_qualifier;
00072 obj2_int_qualifier = a.obj2_int_qualifier;
00073 int i;
00074 for(i=0;i<4;i++)
00075 vertices[i] = a.vertices[i];
00076
00077 pageInstanceCount = a.pageInstanceCount;
00078 docsCount = a.docsCount;
00079 totalInstanceCount = a.totalInstanceCount;
00080 availableInstanceCount = a.availableInstanceCount;
00081 area = a.area;
00082 area_upper_thresh = a.area_upper_thresh;
00083 area_lower_thresh = a.area_lower_thresh;
00084 crossing = a.crossing;
00085 tag = a.tag;
00086 score = a.score;
00087 memcpy(&t, &a.t, sizeof(TURN_REP_REC));
00088
00089 doc_index = a.doc_index;
00090 doc_array = a.doc_array;
00091
00092 return *this;
00093 }
00094 };
00095
00096
00097
00098 struct pairCluster
00099 {
00100 int centroid;
00101 int nMember;
00102 double sum_dist;
00103 CTArray<int> memberlist;
00104 pairCluster()
00105 {
00106 sum_dist = 0;
00107 nMember = 0;
00108 centroid = -1;
00109 }
00110 void Add(int index)
00111 {
00112 if(nMember == memberlist.GetDimension())
00113 {
00114 CTArray<int> tmp = memberlist;
00115 memberlist.Construction(nMember + 11);
00116 int i;
00117 for(i=0;i<nMember;i++)
00118 memberlist[i] = tmp[i];
00119 }
00120 memberlist[nMember++] = index;
00121 }
00122 };
00123
00124
00125 class objectPairClass
00126 {
00127 public:
00128
00129
00130 objectPairClass();
00131 objectPairClass(FILE *fp);
00132 objectPairClass(const char *data_path,FILE *fp);
00133 objectPairClass(struct objectInfo *obj1, int object_count,int documentId,FILE *fp);
00134 ~objectPairClass();
00135
00136
00137 void readDataWithScores(FILE *fp);
00138
00139
00140 int getPairCount() const {return( _numPairsAfterPrune);}
00141 objectPairInfo *getPairs() const {return(_objectPairs);}
00142
00143
00144 void assignScore (objectPairClass *otherClass);
00145 void scoreImage(objectPairClass *objects,float *score, int neighbor);
00146 void dumpHighScoringObjects(struct objectInfo *obj1,float *threshold);
00147
00148
00149 void printInfoSummary(char *filename);
00150 void printVertices();
00151 void printInfo(string filename);
00152 void printScoreInfo(string filename);
00153
00154
00155 void merge(objectPairClass *newData, int new_doc_number);
00156 void prune();
00157 void prune2(bool isMerge = false, bool reallocmemory = true);
00158
00159
00160
00161 void scoreBasedPrune(float threshold);
00162 void frequencyBasedPrune(int threshold, FILE * coref_file);
00163 void frequencyBasedPruneForScoring(int threshold);
00164
00165
00166 int matches(objectPairInfo *pair1, objectPairInfo *pair2, double *score = 0);
00167 bool match2(objectPairInfo *pair1, objectPairInfo *pair2, double* dist);
00168
00169 float getNormalizationFactor() const {return( _normalizationFactor);}
00170 void setNormalizationFactor(float *normalizationFactor);
00171 void setLookupBins(valarray <float> precision);
00172 float lookupScore(float *normalizedScore);
00173
00174 float corrective_factor;
00175 objectPairInfo * _objectPairs;
00176 FILE * _coref_file;
00177 int _numPairs;
00178 int _numPairsAfterPrune;
00179 int _availableEntries;
00180 int _frequencyPrunedLevel;
00181
00182 int _numDoc;
00183
00184 float _normalizationFactor;
00185 valarray <float> _lookupBins;
00186
00187 void SaveToDisk(string fname);
00188 void LoadFromDisk(string fname);
00189 };
00190
00191 class objectPairMatrix
00192 {
00193
00194 public:
00195
00196 objectPairMatrix (vector<std::string> inputVector,
00197 FILE *fp,char *directory_path, char *final_pair_filename,
00198 JScript *scriptId);
00199
00200 private:
00201
00202
00203 };
00204
00205
00206 #endif