Source code for knn

# This file is part of Vegetto.

# Vegetto is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.

# Vegetto is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along with
# Vegetto. If not, see <http://www.gnu.org/licenses/>.

# This work was supported by project PGC2018-098813-B-C31 (Spanish "Ministerio
# de Ciencia, Innovación y Universidades"), and by the European Regional
# Development Fund (ERDF).

import math
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn import preprocessing

from config import Config

__author__ = 'Juan Carlos Gómez-López'
__copyright__ = 'Copyright 2022, EFFICOMP'
__license__ = 'GNU GPL-3.0-or-later'
__version__ = '0.1.0'
__maintainer__ = 'Juan Carlos Gómez-López'
__email__ = 'goloj@ugr.es'
__status__ = 'Development'


[docs]class Knn():
[docs] def __init__(self, config: Config): """ Constructor. :param config: Config object where all the hyperparameter values are loaded :type Config: :py:mod:`config` """ self.data_train = np.load(r"db/{}/data_train.npy".format(config.folder_dataset), allow_pickle=True) self.labels_train = np.load("db/{}/labels_train.npy".format(config.folder_dataset), allow_pickle=True).astype('int') self.data_test = np.load("db/{}/data_test.npy".format(config.folder_dataset), allow_pickle=True) self.labels_test = np.load("db/{}/labels_test.npy".format(config.folder_dataset), allow_pickle=True).astype('int') le = preprocessing.LabelEncoder() le.fit(self.labels_train) self.labels = le.transform(self.labels_train) le = preprocessing.LabelEncoder() le.fit(self.labels_test) self.labels_test = le.transform(self.labels_test) self.k = config.k self.accuracy_validation = 0.0 self.number_of_selected_features = 0.0
[docs] def calculate_kappa_coefficiente_validation(self, individual): """ Calculation of the validation Kappa coefficient. :param individual: Chromosome of the individual (selected features) :type Individual: Individual """ data_to_knn = self.data_train[:, individual] data_train, data_validation, labels_train, labels_validation = train_test_split(data_to_knn, self.labels_train, test_size=0.5, stratify=self.labels_train) if self.k == -1: model = KNeighborsClassifier(n_neighbors=int(round(math.sqrt(len(data_train)))), algorithm='brute') else: model = KNeighborsClassifier(n_neighbors=self.k, algorithm='brute') model.fit(data_train, labels_train) self.accuracy_validation = cohen_kappa_score(model.predict(data_validation), labels_validation) self.number_of_selected_features = len(individual)
[docs] def calculate_accuracy_test(self, individual): """ Calculation of the test accuracy. :param individual: Chromosome of the individual (selected features) :type Individual: Individual """ data_to_knn_train = self.data_train[:, individual] data_to_knn_test = self.data_test[:, individual] if self.k == -1: model = KNeighborsClassifier(n_neighbors=int(round(math.sqrt(len(data_to_knn_train)))), algorithm='brute') else: model = KNeighborsClassifier(n_neighbors=self.k, algorithm='brute') model.fit(data_to_knn_train, self.labels_train) return accuracy_score(model.predict(data_to_knn_test), self.labels_test), cohen_kappa_score( model.predict(data_to_knn_test), self.labels_test)