Main Page | Namespace List | Class List | Directories | File List | Class Members | File Members

Kmeans.h

Go to the documentation of this file.
00001 /*
00002 Kmeans : class for kmeans clustering
00003 *
00004 */
00005 
00006 #if !defined(AFX_KMEANS_H__A2716842_3DD9_4D85_882B_54EB8C40624A__INCLUDED_)
00007 #define AFX_KMEANS_H__A2716842_3DD9_4D85_882B_54EB8C40624A__INCLUDED_
00008 
00009 #if _MSC_VER > 1000
00010 #pragma once
00011 #endif // _MSC_VER > 1000
00012 
00013 #include "../Utility_hm/TArray.h"
00014 #include "../Utility_hm/TMatrix.h"
00015 
00016 #define MAXVECTDIM 12
00017 #define MAXCLUSTER 500
00018 
00019 #ifndef BOOL
00020 #define BOOL bool
00021 #endif
00022 
00023 #ifndef FALSE 
00024 #define FALSE false
00025 #endif
00026 
00027 #ifndef TRUE
00028 #define TRUE true
00029 #endif
00030 
00031 /*
00032 struct aCluster is used only in Kmeans implementations. 
00033 One instance of aCluster records one cluster information.
00034 */
00035 struct aCluster {
00036    double       Center[MAXVECTDIM];
00037    double       SDev[MAXVECTDIM];           //Sample Deviation of this cluster
00038    CTArray<int> Member;                                 //Index of Vectors belonging to this cluster
00039    int          NumMembers;
00040    int                  SizeVector;
00041 
00042 public:
00043 
00044         void Add(int);
00045         void Reset();
00046         BOOL Initiate(int);
00047         void ComputeSampleDev(CTMatrix<double> & pattern, CTArray<double> &def_std);
00048         const aCluster & operator = (aCluster b);
00049         aCluster(){NumMembers = 0; SizeVector=0; };
00050 };
00051 
00052 
00053 class CKmeans  
00054 {
00055 public:
00056         CKmeans(int nPat,int patDim,int size_vector,bool dyn,int num);  //constructor
00057         virtual ~CKmeans();
00058 
00059 //protected:
00060    CTMatrix<double>    Pattern;            //raw data
00061    aCluster     Cluster[MAXCLUSTER];       //cluster info
00062 
00063    CTArray<double> def_stdDev;
00064    int          NumPatterns;               // Number of patterns
00065    int          SizeVector;                // Number of dimensions in vector
00066    int                  PatVectorDim;                      // vector dimension of each pattern ,most time the same as SizeVector        
00067    int          NumClusters;               // Number of clusters
00068 
00069    void                 CalcSampleDev(int ClustID);
00070    void         DistributeSamples();                            // Step 2 of K-means algorithm
00071    BOOL         CalcNewClustCenters();                          // Step 3 of K-means algorithm
00072 inline   double       EucNorm(int, int);                        // Calc Euclidean norm vector
00073    int          FindClosestCluster(int,double&);        // ret indx of clust closest to pattern whose index is arg
00074 public:
00075         void SetDefaultStdDev(CTArray<double> &def);
00076         void SetConstraint(int Dim, double c);
00077         bool ConditionSatisfied(double diff, int iDem);
00078         CTArray<double> maxRange;                                               //max range of each dimension
00079         CTArray<double> constraint;                                             //spetial constraint on each dimension
00080         BOOL bConverged;                                //convergy status flag
00081         BOOL bUseInitialValue;                          //if false, using random values as initial centroid of each cluster
00082         BOOL DynNumCluster;                                                             //if true, dynamically decide the number of clusters
00083         int  runtime;
00084 //      double threshold_ratio;                         
00085         double Je;                                                                              // value of Judge function
00086 //      int nBestCluster;                                                               
00087 
00088         void SetBound4EachDim(double *boundarray);      // set the value range of each dimension
00089         void SetInitialValue(const CTMatrix<double> &); // set the initial value of each cluster
00090         
00091         void Initiate();
00092         void CalcJe();                                                                  // evaluate Judge function
00093         void SetDynamic(BOOL,int);                      // set flag for dynamic determination of number of clusters, when
00094                                                                                                         // flag is false, the second parameter set the predefined number of clusters
00095 
00096         CKmeans();
00097         void Reset();
00098     void InitClusterCenter(bool useinitalcenter);   // Step 1 of K-means algorithm
00099     void RunKMeans(int maxiteration = 0, bool flag = false);            // Overall control K-means process
00100 
00101 private:
00102         CTMatrix<double> InitialValue;                   // initial value of each cluster
00103 };
00104 
00105 #endif // !defined(AFX_KMEANS_H__A2716842_3DD9_4D85_882B_54EB8C40624A__INCLUDED_)

Generated on Tue Aug 29 11:42:39 2006 for PageLayoutDOCLIB by  doxygen 1.4.2