Source code for knn

# This file is part of Vegetto.

# Vegetto is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.

# Vegetto is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along with
# Vegetto. If not, see <http://www.gnu.org/licenses/>.

# This work was supported by project PGC2018-098813-B-C31 (Spanish "Ministerio
# de Ciencia, Innovación y Universidades"), and by the European Regional
# Development Fund (ERDF).

import math
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn import preprocessing

from config import Config

__author__ = 'Juan Carlos Gómez-López'
__copyright__ = 'Copyright 2022, EFFICOMP'
__license__ = 'GNU GPL-3.0-or-later'
__version__ = '0.1.0'
__maintainer__ = 'Juan Carlos Gómez-López'
__email__ = 'goloj@ugr.es'
__status__ = 'Development'


[docs]class Knn():
[docs]    def __init__(self, config: Config):
        """
        Constructor.

        :param config: Config object where all the hyperparameter values are loaded
        :type Config: :py:mod:`config`

        """

        self.data_train = np.load(r"db/{}/data_train.npy".format(config.folder_dataset),
                                  allow_pickle=True)
        self.labels_train = np.load("db/{}/labels_train.npy".format(config.folder_dataset),
                                    allow_pickle=True).astype('int')
        self.data_test = np.load("db/{}/data_test.npy".format(config.folder_dataset),
                                 allow_pickle=True)
        self.labels_test = np.load("db/{}/labels_test.npy".format(config.folder_dataset),
                                   allow_pickle=True).astype('int')

        le = preprocessing.LabelEncoder()
        le.fit(self.labels_train)
        self.labels = le.transform(self.labels_train)

        le = preprocessing.LabelEncoder()
        le.fit(self.labels_test)
        self.labels_test = le.transform(self.labels_test)

        self.k = config.k
        self.accuracy_validation = 0.0
        self.number_of_selected_features = 0.0

[docs]    def calculate_kappa_coefficiente_validation(self, individual):
        """
        Calculation of the validation Kappa coefficient.

        :param individual: Chromosome of the individual (selected features)
        :type Individual: Individual

        """

        data_to_knn = self.data_train[:, individual]

        data_train, data_validation, labels_train, labels_validation = train_test_split(data_to_knn, self.labels_train,
                                                                                        test_size=0.5,
                                                                                        stratify=self.labels_train)
        if self.k == -1:
            model = KNeighborsClassifier(n_neighbors=int(round(math.sqrt(len(data_train)))), algorithm='brute')
        else:
            model = KNeighborsClassifier(n_neighbors=self.k, algorithm='brute')

        model.fit(data_train, labels_train)

        self.accuracy_validation = cohen_kappa_score(model.predict(data_validation), labels_validation)
        self.number_of_selected_features = len(individual)

[docs]    def calculate_accuracy_test(self, individual):
        """
        Calculation of the test accuracy.

        :param individual: Chromosome of the individual (selected features)
        :type Individual: Individual

        """
        data_to_knn_train = self.data_train[:, individual]
        data_to_knn_test = self.data_test[:, individual]

        if self.k == -1:
            model = KNeighborsClassifier(n_neighbors=int(round(math.sqrt(len(data_to_knn_train)))), algorithm='brute')
        else:
            model = KNeighborsClassifier(n_neighbors=self.k, algorithm='brute')

        model.fit(data_to_knn_train, self.labels_train)

        return accuracy_score(model.predict(data_to_knn_test), self.labels_test), cohen_kappa_score(
            model.predict(data_to_knn_test), self.labels_test)