Changeset 5331


Ignore:
Timestamp:
Sep 2, 2022 10:07:31 AM (15 months ago)
Author:
vondreele
Message:

add metrics to cluster analysis. cluster list now colored to match PCA plot colors. NB matplotlib & wx don't render a color the same way so wx colors adjusted to make match as close as I can.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/GSASIIseqGUI.py

    r5330 r5331  
    15701570    import sklearn.cluster as SKC
    15711571    import sklearn.ensemble as SKE
    1572     import sklearn.covariance as SKCO
    15731572    import sklearn.neighbors as SKN
    15741573    import sklearn.svm as SKVM
     1574    import sklearn.metrics as SKM
    15751575       
    15761576    SKLearnCite = '''If you use Scikit-Learn Cluster Analysis, please cite:
     
    18181818            whitMat = SCV.whiten(ClusData['DataMatrix'])
    18191819            if ClusData['Scikit'] == 'K-Means':
    1820                 result = SKC.KMeans(n_clusters=ClusData['NumClust'],algorithm='elkan').fit(whitMat)
     1820                result = SKC.KMeans(n_clusters=ClusData['NumClust'],algorithm='elkan',init='k-means++').fit(whitMat)
    18211821                print('K-Means sum squared dist. to means %.2f'%result.inertia_)
    18221822            elif ClusData['Scikit'] == 'Spectral clustering':
     
    18331833           
    18341834            ClusData['codes'] = result.labels_
     1835            ClusData['Metrics'] = Metrics(whitMat,result)
    18351836            wx.CallAfter(UpdateClusterAnalysis,G2frame,ClusData)
     1837           
     1838        def Metrics(whitMat,result):
     1839            if np.max(result.labels_) >= 1:
     1840                Scoeff = SKM.silhouette_score(whitMat,result.labels_,metric='euclidean')
     1841                print('Silhouette Coefficient: %.3f'%Scoeff)
     1842                CHcoeff = SKM.calinski_harabasz_score(whitMat,result.labels_)
     1843                print('Calinski-Harabasz index (Variance ratio): %.3f'%CHcoeff)
     1844                DBcoeff = SKM.davies_bouldin_score(whitMat,result.labels_)
     1845                print('Davies-Bouldin Index: %.3f'%DBcoeff)
     1846                return Scoeff,CHcoeff,DBcoeff
     1847            else:
     1848                print('number of clusters found must be > 1 for metrics to be determined')
     1849                return None
    18361850                               
    18371851        scikitSizer = wx.BoxSizer(wx.VERTICAL)
     
    18581872        if ClusData['Scikit'] in ['Spectral clustering','Agglomerative clustering']:
    18591873            useTxt = '%s used %s for distance method'%(ClusData['Scikit'],ClusData['Method'])
    1860         print(useTxt)
    18611874        scikitSizer.Add(wx.StaticText(G2frame.dataWindow,label=useTxt))
     1875        if ClusData.get('Metrics',None) is not None:
     1876            metrics = ClusData['Metrics']
     1877            scikitSizer.Add(wx.StaticText(G2frame.dataWindow,
     1878                label='Metrics: Silhoutte: %.3f, Variance: %.3f, Davies-Bouldin: %.3f'%(metrics[0],metrics[1],metrics[2])))
    18621879        return scikitSizer
    18631880   
     
    18771894                data = G2frame.GPXtree.GetItemPyData(G2gd.GetGPXtreeItemId(G2frame, item,'PDF Controls'))
    18781895                G2plt.PlotISFG(G2frame,data,plotType='G(R)')
    1879            
     1896               
     1897        #need 15 colors; values adjusted to match xkcs/PCA plot colors. NB: RGB reverse order from xkcd values.   
     1898        Colors = [['xkcd:blue',0xff0000],['xkcd:red',0x0000ff],['xkcd:green',0x00a000],['xkcd:cyan',0xd0d000],
     1899                  ['xkcd:magenta',0xa000a0],['xkcd:black',0x000000],['xkcd:pink',0xb469ff],['xkcd:brown',0x13458b],
     1900                  ['xkcd:teal',0x808000],['xkcd:orange',0x008cff],['xkcd:grey',0x808080],['xkcd:violet',0xe22b8a],
     1901                  ['xkcd:aqua',0xaaaa00],['xkcd:blueberry',0xcd5a6a],['xkcd:bordeaux',0x00008b]]
    18801902        NClust = np.max(ClusData['codes'])
    18811903        memSizer = wx.BoxSizer(wx.VERTICAL)
    1882         memSizer.Add(wx.StaticText(G2frame.dataWindow,label='Cluster populations:'))       
     1904        memSizer.Add(wx.StaticText(G2frame.dataWindow,label='Cluster populations (colors refer to cluster colors in PCA plot):'))       
    18831905        for i in range(NClust+1):
    18841906            nPop= len(ClusData['codes'])-np.count_nonzero(ClusData['codes']-i)
    1885             memSizer.Add(wx.StaticText(G2frame.dataWindow,label='Cluster #%d has %d members'%(i,nPop)))       
     1907            txt = wx.StaticText(G2frame.dataWindow,label='Cluster #%d has %d members'%(i,nPop))
     1908            txt.SetForegroundColour(wx.Colour(Colors[i][1]))
     1909            memSizer.Add(txt)       
    18861910        headSizer = wx.BoxSizer(wx.HORIZONTAL)
    18871911        headSizer.Add(wx.StaticText(G2frame.dataWindow,label='Select cluster to list members: '),0,WACV)       
     
    18981922                     ClusList.append(item)               
    18991923            cluslist = wx.ListBox(G2frame.dataWindow, choices=ClusList)
     1924            cluslist.SetForegroundColour(wx.Colour(Colors[shoNum][1]))
    19001925            cluslist.Bind(wx.EVT_LISTBOX,OnSelection)
    19011926            memSizer.Add(cluslist)
     
    19161941                ClusData['codes'] = SKN.LocalOutlierFactor().fit_predict(ClusData['DataMatrix'])
    19171942            wx.CallAfter(UpdateClusterAnalysis,G2frame,ClusData,shoNum)
    1918        
    19191943           
    19201944        outSizer = wx.BoxSizer(wx.VERTICAL)
     
    20352059            else:
    20362060                mainSizer.Add(wx.StaticText(G2frame.dataWindow,label='No outlier data found'))
    2037        
    2038    
    2039            
    20402061               
    20412062    bigSizer.Add(mainSizer)
Note: See TracChangeset for help on using the changeset viewer.