Source code for tlseparation.classification.gmm

# Copyright (c) 2017-2019, Matheus Boni Vicari, TLSeparation Project
# All rights reserved.
#
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


__author__ = "Matheus Boni Vicari"
__copyright__ = "Copyright 2017-2019, TLSeparation Project"
__credits__ = ["Matheus Boni Vicari"]
__license__ = "GPL3"
__version__ = "1.3.2"
__maintainer__ = "Matheus Boni Vicari"
__email__ = "matheus.boni.vicari@gmail.com"
__status__ = "Development"

import numpy as np
from sklearn.mixture import GaussianMixture as GMM


[docs]def classify(variables, n_classes): """ Function to perform the classification of a dataset using sklearn's Gaussian Mixture Models with Expectation Maximization. Parameters ---------- variables : array N-dimensional array (m x n) containing a set of parameters (n) over a set of observations (m). n_classes : int Number of classes to assign the input variables. Returns ------- classes : list List of classes labels for each observation from the input variables. means : array N-dimensional array (c x n) of each class (c) parameter space means (n). probability : array Probability of samples belonging to every class in the classification. Sum of sample-wise probability should be 1. """ # Initialize a GMM classifier with n_classes and fit variables to it. gmm = GMM(n_components=n_classes) gmm.fit(variables) return gmm.predict(variables), gmm.means_, gmm.predict_proba(variables)
[docs]def class_select_ref(classes, cm, classes_ref): """ Selects from the classification results which classes are wood and which are leaf. Parameters ---------- classes : list List of classes labels for each observation from the input variables. cm : array N-dimensional array (c x n) of each class (c) parameter space mean valuess (n). classes_ref : array Reference classes values. Returns ------- mask : array List of booleans where True represents wood points and False represents leaf points. """ # Initializing array of class ids. class_ids = np.zeros([cm.shape[0]]) # Looping over each index in the classes means array. for c in range(cm.shape[0]): # Setting initial minimum distance value. mindist = np.inf # Looping over indices in classes reference values. for i in range(classes_ref.shape[0]): # Calculating distance of current class mean parameters and # current reference paramenters. d = np.linalg.norm(cm[c] - classes_ref[i]) # Checking if current distance is smaller than previous distance # if so, assign current reference index to current class index. if d < mindist: class_ids[c] = i mindist = d # Assigning final classes values to new classes. new_classes = np.zeros([classes.shape[0]]) for i in range(new_classes.shape[0]): new_classes[i] = class_ids[classes[i]] return new_classes
[docs]def class_select_abs(classes, cm, nbrs_idx, feature=5, threshold=0.5): """ Select from GMM classification results which classes are wood and which are leaf based on a absolute value threshold from a single feature in the parameter space. Parameters ---------- classes : list or array Classes labels for each observation from the input variables. cm : array N-dimensional array (c x n) of each class (c) parameter space mean valuess (n). nbrs_idx : array Nearest Neighbors indices relative to every point of the array that originated the classes labels. feature : int Column index of the feature to use as constraint. threshold : float Threshold value to mask classes. All classes with means >= threshold are masked as true. Returns ------- mask : list List of booleans where True represents wood points and False represents leaf points. """ # Calculating the ratio of first 3 components of the classes means (cm). # These components are the basic geometric descriptors. if np.max(np.sum(cm, axis=1)) >= threshold: class_id = np.argmax(cm[:, feature]) # Masking classes based on the criterias set above. Mask will present # True for wood points and False for leaf points. mask = classes == class_id else: mask = [] return mask