Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

GenKMeans.TCC

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------+
00002  | Library QgarLib, graphics analysis and recognition                  |
00003  | Copyright (C) 2002  Qgar Project, LORIA                             |
00004  |                                                                     |
00005  | This library is free software; you can redistribute it and/or       |
00006  | modify it under the terms of the GNU Lesser General Public          |
00007  | License version 2.1, as published by the Free Software Foundation.  |
00008  |                                                                     |
00009  | This library is distributed in the hope that it will be useful,     |
00010  | but WITHOUT ANY WARRANTY; without even the implied warranty of      |
00011  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                |
00012  | See the GNU Lesser General Public License for more details.         |
00013  |                                                                     |
00014  | The GNU Lesser General Public License is included in the file       |
00015  | LICENSE.LGPL, in the root directory of the Qgar packaging. See      |
00016  | http://www.gnu.org/licenses/lgpl.html for the terms of the licence. |
00017  | To receive a paper copy, write to the Free Software Foundation,     |
00018  | Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.       |
00019  |                                                                     |
00020  | Contact Project Qgar for any information:                           |
00021  |   LORIA - équipe Qgar                                               |
00022  |   B.P. 239, 54506 Vandoeuvre-lès-Nancy Cedex, France                |
00023  |   email: qgar-contact@loria.fr                                      |
00024  |   http://www.qgar.org/                                              |
00025  *---------------------------------------------------------------------*/
00026 
00027 
00028 /**
00029  * @file   GenKMeans.TCC
00030  * @brief  Implementation of function members of classes
00031  *   qgar::GenCluster and qgar::GenKMeans.
00032  *
00033  * @author <a href="mailto:qgar-develop@loria.fr?subject=Qgar fwd Gérald Masini">Gérald Masini</a>
00034  * @date   January 27, 2005  21:21
00035  * @since  Qgar 2.2
00036  */
00037 
00038 
00039 
00040 // STD
00041 #include <algorithm>
00042 #include <limits>
00043 #include <list>
00044 #include <sstream>
00045 #include <string>
00046 #include <vector>
00047 // QGAR
00048 #include <qgarlib/array.H>
00049 #include <qgarlib/math.H>
00050 #include <qgarlib/QgarErrorAlgorithm.H>
00051 
00052 
00053 
00054 namespace qgar
00055 {
00056 
00057 
00058 /*---------------------------------------------------------------------*
00059  |                                                                     |
00060  |          C  L  A  S  S      G  E  N  C  L  U  S  T  E  R            |
00061  |                                                                     |
00062  *---------------------------------------------------------------------*/
00063 
00064 
00065 // ============
00066 // CONSTRUCTORS
00067 // ============
00068 
00069 template <class T>
00070 GenCluster<T>::GenCluster(const std::vector<value_type>& elements, 
00071                           const_reference center)
00072 
00073   : _center(center), 
00074     _elements(elements)
00075 
00076 {
00077   /* EMPTY */
00078 }
00079 
00080 
00081 template <class T>
00082 GenCluster<T>::GenCluster(const GenCluster<T>& other)
00083 
00084   : _center(other._center),
00085     _elements(other._elements)
00086 
00087 {
00088   /* EMPTY */ 
00089 }
00090 
00091 
00092 // =========
00093 // OPERATORS
00094 // =========
00095 
00096 
00097 template <class T>
00098 GenCluster<T>& 
00099 GenCluster<T>::operator=(const GenCluster<T>& rhs)
00100 {
00101   if (this != &rhs) 
00102     {
00103       _center   = rhs._center;
00104       _elements = rhs._elements;
00105     }
00106 
00107   return *this;
00108 }
00109 
00110 
00111 // ======
00112 // ACCESS
00113 // ======
00114 
00115 
00116 template <class T>
00117 inline size_t
00118 GenCluster<T>::size() const
00119 { 
00120   return _elements.size();
00121 }
00122 
00123 
00124 template <class T>
00125 inline const T&
00126 GenCluster<T>::center() const
00127 {
00128   return _center;
00129 }
00130 
00131 
00132 template <class T>
00133 inline const std::vector<T>&
00134 GenCluster<T>::elements() const
00135 {
00136   return _elements;
00137 }
00138 
00139 
00140 /*---------------------------------------------------------------------*
00141  |                                                                     |
00142  *---------------------------------------------------------------------*/
00143 
00144 
00145 
00146 
00147 
00148 /*---------------------------------------------------------------------*
00149  |                                                                     |
00150  |            C  L  A  S  S      G  E  N  K  M  E  A  N  S             |
00151  |                                                                     |
00152  *---------------------------------------------------------------------*/
00153 
00154 
00155 // ============
00156 // CONSTRUCTORS
00157 // ============
00158 
00159 
00160 // CONSTRUCT A PARTITION OF A GIVEN NUMBER OF CLUSTERS,
00161 // USING A GIVEN DISTANCE FUNCTION
00162 // anObjVect       vector of objects to be partitioned
00163 // aDistFunction   distance function
00164 // aClusterCnt     number of resulting clusters
00165 
00166 template<class T>
00167 GenKMeans<T>::GenKMeans(const std::vector<T>& anObjVect,
00168                         DistanceFunction aDistFunction,
00169                         int aClusterCnt)
00170 
00171   throw(QgarErrorAlgorithm)
00172 
00173   : _clusterCnt(aClusterCnt),
00174     _clusterElts(aClusterCnt),
00175     _distance(aDistFunction)
00176 
00177 {
00178   _objVect.reserve(anObjVect.size());
00179 
00180   for (typename std::vector<T>::const_iterator it = anObjVect.begin();
00181        it != anObjVect.end();
00182        ++it)
00183     {
00184       _objVect.push_back(&(*it));
00185     }
00186 
00187   PRIVATEinitCenters();
00188   PRIVATEgetClusters();
00189 }
00190 
00191 
00192 // CONSTRUCT A PARTITION OF A GIVEN NUMBER OF CLUSTERS,
00193 // USING A GIVEN DISTANCE FUNCTION AND GIVEN INITIAL CENTERS
00194 // anObjVect       vector of objects to be partitioned
00195 // aDistFunction   distance function
00196 // aCenterVector   initial cluster centers
00197 // aClusterCnt     number of clusters
00198 
00199 template<class T>
00200 GenKMeans<T>::GenKMeans(const std::vector<T>& anObjVect,
00201                         DistanceFunction aDistFunction,
00202                         const std::vector<T>& aCenterVector,
00203                         int aClusterCnt)
00204 
00205   throw(QgarErrorAlgorithm)
00206   : _clusterCnt(aClusterCnt),
00207     _clusterElts(aClusterCnt),
00208     _distance(aDistFunction)
00209 
00210 {
00211   _objVect.reserve(anObjVect.size());
00212 
00213   for(unsigned int i=0; i < anObjVect.size(); ++i)
00214     {
00215       _objVect.push_back(&anObjVect[i]);
00216     }
00217 
00218   for (int idx = 0 ; idx < aClusterCnt ; idx++)
00219     {
00220       _clusterSizes.push_back(0);                        // Cluster size
00221       _clusterCenters.push_back(&aCenterVector[idx]);    // Cluster center
00222     }
00223   
00224   PRIVATEgetClusters();
00225 }
00226 
00227 
00228 // ==========
00229 // DESTRUCTOR
00230 // ==========
00231 
00232 
00233 template<class T>
00234 GenKMeans<T>::~GenKMeans()
00235 {
00236   // VOID
00237 }
00238 
00239 
00240 // ======
00241 // ACCESS
00242 // ======
00243 
00244 
00245 // GET NUMBER OF RESULTING CLUSTERS
00246 
00247 template<class T>
00248 inline int
00249 GenKMeans<T>::clusterCnt() const
00250 {
00251   return _clusterCnt;
00252 }
00253 
00254 
00255 // GET VECTOR OF RESULTING CLUSTERS SIZES
00256 
00257 template<class T>
00258 inline const std::vector<int>&
00259 GenKMeans<T>::clusterSizes() const
00260 {
00261   return _clusterSizes;
00262 }
00263 
00264 
00265 // GET VECTOR OF RESULTING CLUSTER CENTERS
00266 
00267 template<class T>
00268 inline const std::vector<const T*>&
00269 GenKMeans<T>::clusterCenters() const
00270 {
00271   return _clusterCenters;
00272 }
00273 
00274 
00275 // GET THE VECTOR OF CLUSTER ELEMENTS
00276 
00277 template<class T>
00278 inline const std::vector< std::vector<const T*> >&
00279 GenKMeans<T>::accessClusterElts() const
00280 {
00281   return _clusterElts;
00282 }
00283 
00284 
00285 // GET A COPY OF THE VECTOR OF CLUSTER ELEMENTS
00286 
00287 template<class T>
00288 inline std::vector< std::vector<const T*> >
00289 GenKMeans<T>::clusterElts() const
00290 {
00291   return _clusterElts;
00292 }
00293 
00294 
00295 // GET THE VECTOR OF CLUSTERS
00296 
00297 template<class T>
00298 const std::vector< GenCluster<T> >&
00299 GenKMeans<T>::accessClusters() const
00300 {
00301   PRIVATEconsFinalClusters();
00302   return _clusters; 
00303 }
00304 
00305 
00306 // GET A COPY OF THE VECTOR OF CLUSTERS
00307 
00308 template<class T>
00309 std::vector< GenCluster<T> >
00310 GenKMeans<T>::clusters() const
00311 {
00312   PRIVATEconsFinalClusters();
00313   return _clusters; 
00314 }
00315 
00316 
00317 // ===========
00318 // AUXILIARIES
00319 // ===========
00320 
00321 
00322 // INITIALIZE CLUSTER CENTERS, USING A GIVEN NUMBER
00323 // OF THE FIRST OBJECTS FROM THE GIVEN LIST OF OBJECTS
00324 
00325 template <class T>
00326 void
00327 GenKMeans<T>::PRIVATEinitCenters()
00328 
00329   throw(QgarErrorAlgorithm)
00330 
00331 {
00332   typename std::vector<T const *>::iterator iterObjs
00333     = _objVect.begin();
00334   
00335   // While the expected number of clusters is not reached
00336   // ====================================================
00337   int  idx = 0;
00338   while (idx < _clusterCnt)
00339     {
00340       if (iterObjs == _objVect.end())
00341         {
00342           // No more objects to get remaining centers
00343           std::ostringstream os;
00344           os << "Unable to get given number of initial clusters: "
00345              << _clusterCnt;
00346           throw QgarErrorAlgorithm(__FILE__, __LINE__,
00347                                    "template <class T> void qgar::GenKMeans<T>::PRIVATEinitCenters()",
00348                                    os.str());
00349         }
00350 
00351       // Make sure that the current object is not very close
00352       // (in the sense of the distance function) to an existing center,
00353       // to avoid getting empty clusters after distribution of the objects
00354       bool sameCenter = false;
00355       for (int jdx = 0 ; jdx < idx ; jdx++)
00356         {
00357           if (qgEq0((*_distance)(*_clusterCenters[jdx], **iterObjs)))
00358           {
00359             sameCenter = true;
00360             break;
00361           }
00362         } // for jdx
00363 
00364       if (!sameCenter)
00365         {
00366           // Record the center
00367           _clusterCenters.push_back(*iterObjs);
00368           // Initialize the size of the new cluster
00369           _clusterSizes.push_back(0);
00370           // One more cluster has been created
00371           idx++;
00372         }
00373       
00374       iterObjs++;
00375     } // while ====================================================
00376 
00377 } // function PRIVATEinitCenters
00378 
00379 
00380 
00381 // DISTRIBUTE OBJECTS INTO CLUSTERS
00382 
00383 template <class T>
00384 void
00385 GenKMeans<T>::PRIVATEdistributeIntoClusters()
00386 {
00387   // index of the corresponding cluster
00388   int idxMin = 0;
00389 
00390   // For each object
00391   // ==============================================================
00392   for(typename std::vector<const T*>::const_iterator
00393         iterObjs = _objVect.begin();
00394       iterObjs != _objVect.end();
00395       ++iterObjs)
00396     {
00397 
00398       double distMin = std::numeric_limits<double>::max();
00399       
00400       // Computer the minimum distance 
00401       // between the current object and each center
00402       for(int idx = 0 ; idx < _clusterCnt ; ++idx)
00403         {
00404           double dist = (*_distance)(**iterObjs, *_clusterCenters[idx]);
00405           if (dist < distMin)
00406             {
00407               distMin = dist;
00408               idxMin = idx;
00409             }
00410         }
00411       
00412       // Put the current object in the right cluster
00413       _clusterElts[idxMin].push_back(*iterObjs);
00414       _clusterSizes[idxMin]++;
00415     } // for ======================================================
00416 
00417 } // function PRIVATEdistributeIntoClusters
00418 
00419 
00420 
00421 // COMPUTE CLUSTER CENTERS
00422 
00423 template <class T>
00424 void
00425 GenKMeans<T>::PRIVATEgetCenters(std::vector<const T*>& aNewCenterVector)
00426 
00427   throw(QgarErrorAlgorithm)
00428 
00429 {
00430   // For each cluster
00431   // ===============================================================
00432   for(int idxClu = 0 ; idxClu < _clusterCnt ; ++idxClu)
00433     {
00434       int sizeClu = _clusterSizes[idxClu]; // Size of the current cluster
00435       if (sizeClu == 0)
00436         {
00437           throw QgarErrorAlgorithm(__FILE__, __LINE__,
00438                                    "template <class T> void qgar::GenKMeans<T>::PRIVATEgetCenters(std::vector<const T*>&)",
00439                                    "At least one cluster is empty");
00440         }
00441       else
00442         {
00443           // Pointer to the current cluster
00444           std::vector<const T*>& pClu = _clusterElts[idxClu];
00445 
00446           // Table to cumulate the squared distances
00447           // between the objects of the current cluster:
00448           // distTab[i] = SUM distance(current object, j)
00449           //              for each j in the current cluster
00450 
00451           double* distTab = new double[sizeClu];
00452           qgFill(distTab, sizeClu, 0.);
00453 
00454           // For each object of the cluster, except the last one
00455           // -------------------------------------------------------
00456           for(int idxObj = 0 ; idxObj < (sizeClu - 1) ; idxObj++)
00457             {
00458               // The current object
00459               const T* currObj = (pClu)[idxObj];
00460 
00461               // For each object of the current cluster, whose index
00462               // is greater than the index of the current object
00463 
00464               for(int jdxObj = idxObj + 1 ; jdxObj < sizeClu ; ++jdxObj)
00465                 {
00466                   // Squared distance between current object and object `jdxObj'
00467                   double dist = (*_distance)(*currObj, *(pClu)[jdxObj]);
00468                   dist *= dist;
00469                   // Cumulate the distance for current object and object `jdxObj'
00470                   distTab[idxObj] += dist;
00471                   distTab[jdxObj] += dist;
00472                 }  // for each object jdxObj
00473 
00474             } // for -----------------------------------------------
00475 
00476           // The new center of the cluster is the object whose index
00477           // corresponds to the minimum value in table `distTab'
00478           aNewCenterVector[idxClu] = (pClu)[qgMinElement(distTab, sizeClu)];
00479 
00480           delete [] distTab;
00481         }
00482 
00483     } // for each cluster ==========================================
00484 
00485 } // function PRIVATEgetCenters
00486 
00487 
00488 
00489 // LOOP ON CONSTRUCTING CLUSTERS UNTIL GETTING STABLE CENTERS
00490 
00491 template <class T>
00492 void
00493 GenKMeans<T>::PRIVATEgetClusters()
00494 {
00495   // New cluster centers
00496   std::vector<const T*> newCenterVector(_clusterCnt);
00497   
00498   // Loop until getting stable centers
00499   // ===============================================================
00500   while (true)
00501     {
00502       // Distribute objects into clusters
00503       PRIVATEdistributeIntoClusters();
00504 
00505       // Compute new cluster centers
00506       PRIVATEgetCenters(newCenterVector);
00507 
00508       // True when centers remain unchanged
00509       bool stable = true;
00510 
00511       // Terminate if new centers are the same as the old ones
00512       for(int idx = 0 ; idx < _clusterCnt ; ++idx)
00513         {
00514           stable = stable && (_clusterCenters[idx] == newCenterVector[idx]);
00515           if (!stable)
00516             {
00517               break;
00518             }
00519         } // END for
00520 
00521       if (stable)
00522         {
00523           break;
00524         }
00525 
00526       // For each cluster
00527       for(int idx = 0 ; idx < _clusterCnt ; ++idx)
00528         {
00529           // Pointer on current cluster
00530           std::vector<const T*>& pClu = _clusterElts[idx];
00531 
00532           // Erase current cluster
00533           pClu.erase(pClu.begin(), pClu.end());
00534 
00535           // Re-create current cluster
00536           _clusterSizes[idx] = 0;
00537 
00538           // Store new center
00539           _clusterCenters[idx] = newCenterVector[idx];
00540         }
00541 
00542     } // loop ======================================================
00543 
00544 } // function PRIVATEgetClusters
00545 
00546 
00547 
00548 // CONSTRUCT THE FINAL CLUSTERS
00549 
00550 template <class T>
00551 void
00552 GenKMeans<T>::PRIVATEconsFinalClusters()
00553 {
00554   // Lazy computation
00555 
00556   if (_clusters.empty())
00557     {
00558 
00559       for (int clusterIdx = 0; clusterIdx < _clusterCnt; ++clusterIdx) 
00560         {
00561           std::vector<T> vect;
00562           vect.reserve(_clusterSizes[clusterIdx]);
00563 
00564           for(int eltIdx = 0; eltIdx < _clusterSizes[clusterIdx]; ++eltIdx) 
00565             {
00566               vect.push_back(*_clusterElts[clusterIdx][eltIdx]);
00567             }
00568 
00569           _clusters.push_back(GenCluster<T>(vect, *_clusterCenters[clusterIdx]));
00570         } // END for
00571 
00572     } // END if
00573 }
00574 
00575 
00576 /*---------------------------------------------------------------------*
00577  |                                                                     |
00578  *---------------------------------------------------------------------*/
00579 
00580 
00581 
00582 
00583 
00584 /*---------------------------------------------------------------------*
00585  |                                                                     |
00586  |                         H  E  L  P  E  R  S                         |
00587  |                                                                     |
00588  *---------------------------------------------------------------------*/
00589 
00590 
00591 // CONSTRUCT A PARTITION OF CLUSTERS USING A DISTANCE FUNCTION
00592 
00593 template <class T>
00594 std::vector< GenCluster<T> > 
00595 qgKMeans(const std::vector<T>& anObjVect,
00596          double (*aDistFunction)(const T&, const T&),
00597          int aClusterCnt)
00598 
00599   throw (QgarErrorAlgorithm)
00600 
00601 {
00602   return (GenKMeans<T>(anObjVect, aDistFunction, aClusterCnt)).clusters();
00603 }
00604 
00605 
00606 // CONSTRUCT A PARTITION OF CLUSTERS OF GIVEN CENTERS,
00607 // USING A DISTANCE FUNCTION
00608 
00609 template <class T>
00610 std::vector< GenCluster<T> >
00611 qgKMeans(const std::vector<T>&   anObjVect,
00612          double (*aDistFunction) (const T&, const T&),
00613          std::vector<T*>&        aCenterVector,
00614          int                     aClusterCnt)
00615 
00616   throw (QgarErrorAlgorithm)
00617 
00618 {
00619   return
00620     (GenKMeans<T>(anObjVect, aDistFunction, aCenterVector, aClusterCnt)).clusters();
00621 }
00622 
00623 
00624 /*---------------------------------------------------------------------*
00625  |                                                                     |
00626  *---------------------------------------------------------------------*/
00627 
00628 
00629 } // namespace qgar