Реализация алгоритма FRIS-STOLP (python 3)
Теория по данному алгоритму приведена здесь, а также в курсе Константина Воронцова.
В данной статье мы рассмотрим только реализацию на python 3.
Код можно копировать в том же порядке «как есть».
Подключаем необходимые пакеты
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from math import floor, ceil # for rounding up and down
from scipy.stats import pearsonr
from sklearn import datasets
import pandas as pd
import math
from typing import Dict, Tuple, List
import copy
from random import randint
from scipy.stats import pearsonr
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
import matplotlib.cm as cm
from scipy.stats import pearsonr
import matplotlib.colors as matplotcolor
from sklearn.decomposition import PCA
from random import randint
import matplotlib.cm as cm
def Get_Class_Label_Of_Object(x):
return x[len(x)-1]
Метод вычисления расстояния между двумя объектами
def Distance(x, y):
if len(x) != len(y):
return -1
sum = 0
for i in range(len(x)):
sum += (x[i]-y[i])*(x[i]-y[i])
return sum
Чтение данных из файла:
def PrepareData():
all_data = np.loadtxt(open("./wine_data.csv","r"), delimiter = ",", skiprows = 0, dtype = np.float64)
# load class labels from column 1
y_wine = all_data[:,0]
#print (y_wine)
# conversion of the class labels to integer-type array
y_wine = y_wine.astype(np.int64, copy = False)
# load the 14 features
X_wine = all_data[:,1:]
data = all_data[:, 6:8] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 6:9
x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
plt.figure(2, figsize = (16, 12))
plt.clf()
# Plot the training points
plt.scatter(data[:, 0], data[:, 1], c = y_wine, cmap = plt.cm.Set1, edgecolor = 'k')
plt.xlabel('Flavanoids')
plt.ylabel('Nonflavanoid phenols')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
uniq = [1 for x in range(len(data))]
cnt_of_uniq = 0
for i in range(len(data)):
for j in range(i+1,len(data)):
if data[i, 0] == data[j, 0] and data[i, 1] == data[j, 1]:
uniq[j] = 0
for i in range(len(data)):
if uniq[i] == 1:
cnt_of_uniq += 1
w, h = 3, cnt_of_uniq
m = [[0 for x in range(w)] for y in range(h)]
cur_cnt = 0
for i in range(len(data)):
if uniq[i] == 1:
for j in range(2):
m[cur_cnt][j] = data[i, j]
m[cur_cnt][2] = y_wine[i]-1
cur_cnt += 1
data = m
return data
data = PrepareData()
Нормализация данных, отображение данных на графике, убираем одинаковые объекты в выборке:
def Prepare_Iris_Data():
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, 2:4] # we only take the first two features.
y = iris.target
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize = (16, 12))
plt.clf()
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c = y, cmap = plt.cm.Set1, edgecolor = 'k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
uniq = [1 for x in range(len(X))]
cnt_of_uniq = 0
for i in range(len(X)):
for j in range(i+1,len(X)):
if X[i, 0] == X[j, 0] and X[i, 1] == X[j, 1]:
uniq[j] = 0
for i in range(len(X)):
if uniq[i] == 1:
cnt_of_uniq += 1
w, h = 3, cnt_of_uniq
m = [[0 for x in range(w)] for y in range(h)]
cur_cnt = 0
for i in range(150):
if uniq[i] == 1:
for j in range(2):
m[cur_cnt][j] = X[i, j]
m[cur_cnt][2] = y[i]
cur_cnt += 1
data = m
return data
iris_data = Prepare_Iris_Data()
Выборка данных «Ирисы Фишера»
Необходимые столпы (эталоны) для алгоритма
Класс FRIS-STOLP:
class FRiS_STOLP:
EPS = 1E-7
# Найти ближайшего соседа
def Get_Nearest_Neighbour(self, x, Omega):
dist = [0]*len(Omega)
for i in range(len(Omega)):
dist[i] = Distance(x[0:len(x)-1], Omega[i][0:len(x)-1])
min_dist = min(dist)
for i in range(len(Omega)):
if dist[i] + self.EPS > min_dist and dist[i] - self.EPS < min_dist:
return Omega[i]
# Найти эталоны
def FindEtalon(self, X, Omega, Xl, defence_to_tolerance_ratio):
label = Get_Class_Label_Of_Object(X[0])
Efficiency = [0]*len(X)
Tolerance_denom = 0
for item in Xl:
if Get_Class_Label_Of_Object(item) != label:
Tolerance_denom += 1
if Tolerance_denom == 0:
Tolerance_denom = 1
NN_enemy_etalon_For_Defence = [list() for t in range(len(X))]
for p in range(len(X)):
item = X[p]
if Get_Class_Label_Of_Object(item) == label:
NN_enemy_etalon_For_Defence[p] = self.Get_Nearest_Neighbour(item, Omega)
NN_enemy_etalon_For_Tolerance = [list() for t in range(len(Xl))]
for p in range(len(Xl)):
item = Xl[p]
if Get_Class_Label_Of_Object(item) != label:
NN_enemy_etalon_For_Tolerance[p] = self.Get_Nearest_Neighbour(item, Omega)
for i in range(len(X)):
x = X[i]
sum_of_defence = 0
for j in range(len(X)):
item = X[j]
if item != x and Get_Class_Label_Of_Object(item) == label:
S = self.FRiS(NN_enemy_etalon_For_Defence[j], item, x)
sum_of_defence += S
Defence = sum_of_defence / (len(X)-1)
Tolerance_sum = 0
NN_enemy_etalon = []
NN_enemy_etalon = [list() for t in range(len(Xl))]
for j in range(len(Xl)):
item = Xl[j]
if Get_Class_Label_Of_Object(item) != label:
S = self.FRiS(x, item, NN_enemy_etalon_For_Tolerance[j])
Tolerance_sum += S
Tolerance = Tolerance_sum / Tolerance_denom
Efficiency[i] = defence_to_tolerance_ratio * Defence + (1.0 - defence_to_tolerance_ratio) * Tolerance
max_Efficiency = max(Efficiency)
for i in range(len(X)):
if Efficiency[i] + self.EPS > max_Efficiency and Efficiency[i] - self.EPS < max_Efficiency:
return X[i]
# Получить объекты i-го класса
def Get_Objects_From_Ith_Class(self, X, class_label):
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) == class_label:
cnt += 1
w, h = 4, cnt
res = [[0 for x in range(w)] for y in range(h)]
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) == class_label:
res[cnt] = obj
cnt += 1
return res
# --------------------------------------------------------------
# Получить "врагов" i-го класса
def Get_Enemies_Of_Ith_Class(self, X, class_label):
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) != class_label:
cnt += 1
w, h = 4, cnt
res = [[0 for x in range(w)] for y in range(h)]
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) != class_label:
res[cnt] = obj
cnt += 1
return res
# --------------------------------------------------------------
def Truncate_Set(self, X, Y):
res = []
for i in range(len(X)):
is_exist = False
for j in range(len(Y)):
if X[i] == Y[j]:
is_exist = True
break
if is_exist == False:
res[len(res):] = [X[i]]
return res
# --------------------------------------------------------------
# FRIS-функция
def FRiS(self, a, x, c):
S_nom = math.sqrt(Distance(a[0:len(x)-1], x[0:len(x)-1])) - math.sqrt(Distance(x[0:len(x)-1], c[0:len(x)-1]))
S_denom = math.sqrt(Distance(a[0:len(x)-1], x[0:len(x)-1])) + math.sqrt(Distance(x[0:len(x)-1], c[0:len(x)-1]))
S = S_nom / S_denom
return S
# Метод "получить множество правильно классифицированных объектов"
def Construct_A_Set_Of_Correctly_Classified_Objects(self, Xl, Etalons):
res = []
for i in range(len(Xl)):
x = Xl[i]
label_of_cur_class = Get_Class_Label_Of_Object(x)
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[label_of_cur_class]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
nearest_neighbour_friend = self.Get_Nearest_Neighbour(x, Etalons[label_of_cur_class])
nearest_neighbour_enemy = self.Get_Nearest_Neighbour(x, Enemies_Etalons)
S = self.FRiS(nearest_neighbour_enemy, x, nearest_neighbour_friend)
if S > 0:
res[len(res):] = [x]
return res
# --------------------------------------------------------------
# Получить количество классов в выборке
def Get_Number_Of_Classes_In_Dataset(self, Xl):
len_of_obj = len(Xl[0])
labels = set()
for obj in Xl:
cur_label = obj[len_of_obj-1]
labels.add(cur_label)
return len(labels)
# Объекдинить элементы
def Merge_Elements(self, lst):
res = []
for i in range(len(lst)):
res += lst[i]
return res
# Классификация по одному ближайшему соседу
def Classify_By_1NN(self, u, Etalons):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(Etalons)
nn = self.Get_Nearest_Neighbour(u, Etalons)
return Get_Class_Label_Of_Object(nn)
# Получить эталоны
def Get_Etalons(self, Xl):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(Xl)
Etalons = [list() for i in range(number_of_classes)]
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
X_enemies = self.Get_Enemies_Of_Ith_Class(Xl, i)
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, X_enemies, Xl, 0.5)]
# print (Etalons)
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[i]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
Etalons[i] = []
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, Enemies_Etalons, Xl, 0.5)]
while len(Xl) > 0:
U = self.Construct_A_Set_Of_Correctly_Classified_Objects(Xl, Etalons)
Xl = self.Truncate_Set(Xl, U)
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
if len(Xy) == 0:
continue
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[i]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
if len(Xy) == 1:
obj = Xy[0]
Etalons[i][len(Etalons[i]):] = [obj]
else:
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, Enemies_Etalons, Xl, 0.5)]
return Etalons
def FRiS_Support_Classify(self, u, xl, k, draw_plot_if_two_dimentional, draw_lines_between_u_nn_and_supports):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(xl)
nn = [list() for t in range(number_of_classes)]
for i in range(number_of_classes):
Xy = self.Get_Objects_From_Ith_Class(xl, i)
nn[i] = self.Get_Nearest_Neighbour(u, Xy)
candidates_for_supporting = [list() for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
candidates_for_supporting[cur_class] = self.Get_Objects_From_Ith_Class(xl, cur_class)
candidates_for_supporting[cur_class].remove(nn[cur_class])
supporting_list = [list() for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
for it in range(k-1):
supporting_element = fris.Get_Nearest_Neighbour(nn[cur_class], candidates_for_supporting[cur_class])
candidates_for_supporting[cur_class].remove(supporting_element)
supporting_list[cur_class].append(supporting_element)
sum_of_fris = [0 for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
for i in range(len(supporting_list[cur_class])):
sum_of_fris[cur_class] += self.FRiS(supporting_list[cur_class][i], nn[cur_class], u)
if draw_lines_between_u_nn_and_supports == True:
plt.plot([supporting_list[cur_class][i][0], nn[cur_class][0]], [supporting_list[cur_class][i][1], nn[cur_class][1]], color = 'yellow')
plt.plot([nn[cur_class][0], u[0]], [nn[cur_class][1], u[1]], color = 'yellow')
sum_of_fris[cur_class] /= len(supporting_list[cur_class])
if draw_lines_between_u_nn_and_supports == True:
plt.scatter([u[0]], [u[1]], c = 'green', s = 25)
label = sum_of_fris.index(max(sum_of_fris))
cmap_light = matplotlib.colors.ListedColormap(['red', 'green', '#AAAAFF'])
#---------------------------------------------------------
if len(u)-1 == 2 and draw_plot_if_two_dimentional == True:
cur_col = 'green'
if label == 0:
cur_col = 'red'
if label == 1:
cur_col = 'blue'
plt.scatter([u[0]], [u[1]], c = cur_col, s = 25)
x1_coord = []
x2_coord = []
y = []
for i in range(len(xl)):
x1_coord.append(xl[i][0])
x2_coord.append(xl[i][1])
y.append(xl[i][2])
plt.scatter(x1_coord, x2_coord, c = y, cmap = cmap_light, s = 25)
#---------------------------------------------------------
return label
Основная программа:
# Инициализация выборки
xl_0 = [[-1, 1, 0], [-1, 3, 0], [-1, 5, 0], [1, 1, 0], [1, 3, 0], [1, 5, 0], [3, 1, 0], [3, 3, 0], [3, 5, 0], [5, 1, 0], [5, 3, 0], [5, 5, 0]]
xl_1 = [[8, 3, 1], [7.8, 3, 1], [8.2, 3, 1], [8, 3.2, 1], [8, 2.8, 1], [7.9, 2.9, 1], [7.9, 3.1, 1], [8.1, 2.9, 1], [8.1, 3.1, 1]]
xl = xl_0 + xl_1
fris = FRiS_STOLP()
x_min = 5
y_min = 1
x_max = 9
y_max = 5
N = 30
M = 30
plt.figure(2, figsize = (16, 12))
dx = (x_max-x_min)/N
dy = (y_max-y_min)/M
classification_map = []
for i in range(N+1):
x = x_min + i * dx
for j in range(M+1):
y = y_min + j * dy
u = [x, y, -1]
classification_map.append(fris.FRiS_Support_Classify(u = u, xl = xl, k = 5, draw_plot_if_two_dimentional = True, draw_lines_between_u_nn_and_supports = False))
#----------------------------------------------------------
u = [6.0, 3.0, -1]
fris.FRiS_Support_Classify(u = u, xl = xl, k = 5, draw_plot_if_two_dimentional = False, draw_lines_between_u_nn_and_supports = True)
plt.show()
#----------------------------------------------------------
plt.figure(2, figsize = (16, 12))
N = 30
M = 30
dx = (x_max - x_min) / (N+1)
dy = (y_max - y_min) / (M+1)
xx, yy = np.arange(x_min, x_max, dx), np.arange(y_min, y_max, dy)
xv, yv = np.meshgrid(xx, yy)
cmap_light = matplotlib.colors.ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
c = np.empty(xv.shape, dtype = int)
for x_it in range(xv.shape[0]):
for y_it in range(yv.shape[0]):
c[y_it][x_it] = classification_map[x_it*yv.shape[0]+y_it]
plt.pcolormesh(xv, yv, c, cmap = cmap_light)
x1_coord = []
x2_coord = []
y = []
for i in range(len(xl)):
x1_coord.append(xl[i][0])
x2_coord.append(xl[i][1])
y.append(xl[i][2])
plt.scatter(x1_coord, x2_coord, c = y, cmap = plt.cm.Set1, s = 25)
plt.show()
Пример работы алгоритма FRIS-STOLP
Classification map для алгоритма FRIS-STOLP
# Создание объекта FRIS-STOLP
fris = FRiS_STOLP()
# Получили эталоны для выборки по всем классам
Etalons = fris.Get_Etalons(data)
all_data = np.loadtxt(open("./wine_data.csv","r"), delimiter = ",", skiprows = 0, dtype = np.float64)
# load class labels from column 1
y_wine = all_data[:,0]
#print (y_wine)
# conversion of the class labels to integer-type array
y_wine = y_wine.astype(np.int64, copy = False)
# load the 14 features
X_wine = all_data[:,1:]
data = all_data[:, 6:8] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 6:9
x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
plt.figure(2, figsize = (8, 6))
plt.clf()
# Plot the training points
plt.scatter(data[:, 0], data[:, 1], c = y_wine, cmap = plt.cm.Set1, edgecolor = 'k')
plt.xlabel('Flavanoids')
plt.ylabel('Nonflavanoid phenols')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
x_coord = []
y_coord = []
labels = []
Etalons = fris.Merge_Elements(Etalons)
for i in range(len(Etalons)):
x_coord.append(Etalons[i][0])
for i in range(len(Etalons)):
y_coord.append(Etalons[i][1])
#plt.plot(iris_data[119][0], iris_data[119][1], 'ro', color = 'g')
plt.plot(x_coord, y_coord, 'ro', color = 'b')
plt.show()
Создание classification map:
def Create_Classification_Map(x_min, x_max, N, y_min, y_max, M, Etalons):
dx = (x_max - x_min)/N
dy = (y_max - y_min)/M
obj_list = [list() for t in range(N*M)]
for i in range(N):
x = x_min + i * dx
for j in range(M):
y = y_min + j * dy
u = [x, y, -1]
label = fris_for_iris.Classify_By_1NN(u, Etalons)
obj = [x, y, label]
obj_list[i*M+j] = obj
return obj_list
fris_for_iris = FRiS_STOLP()
Etalons_iris = fris_for_iris.Get_Etalons(iris_data)
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, 2:4] # we only take the first two features.
y = iris.target
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize = (10, 8))
plt.clf()
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c = y, edgecolor = 'k', alpha = 0.5)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.axis
x_coord = []
y_coord = []
labels = []
Etalons_iris = fris_for_iris.Merge_Elements(Etalons_iris)
classification_map = Create_Classification_Map(x_min, x_max, 100, y_min, y_max, 100, Etalons_iris)
x_coords_for_classification_map = []
y_coords_for_classification_map = []
c_for_classification_map = []
for i in range(len(classification_map)):
obj = classification_map[i]
label = Get_Class_Label_Of_Object(obj)
cur_col = "white"
if label == 0:
cur_col = 'red'
if label == 1:
cur_col = 'green'
if label == 2:
cur_col = 'yellow'
x_coords_for_classification_map.append(classification_map[i][0])
y_coords_for_classification_map.append(classification_map[i][1])
c_for_classification_map.append(label)
N = 100
M = 100
dx = (x_max - x_min) / N
dy = (y_max - y_min) / M
xx, yy = np.arange(x_min, x_max, dx), np.arange(y_min, y_max, dy)
xv, yv = np.meshgrid(xx, yy)
cmap_light = matplotlib.colors.ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
c = np.empty(xv.shape, dtype = int)
for x_it in range(xv.shape[0]):
for y_it in range(yv.shape[0]):
c[x_it][y_it] = c_for_classification_map[y_it*xv.shape[0]+x_it]
plt.pcolormesh(xv, yv, c, cmap = cmap_light)
for i in range(len(X)):
obj = X[i]
label = y[i]
cur_col = "white"
if label == 0:
cur_col = 'red'
if label == 1:
cur_col = 'green'
if label == 2:
cur_col = 'yellow'
plt.plot([obj[0]], [obj[1]], 'ro', color = cur_col, alpha = 1.0)
for i in range(len(Etalons_iris)):
x_coord.append(Etalons_iris[i][0])
for i in range(len(Etalons_iris)):
y_coord.append(Etalons_iris[i][1])
for i in range(len(Etalons_iris)):
shape = 's'
obj = Etalons_iris[i]
label = Get_Class_Label_Of_Object(obj)
cur_col = "white"
if label == 0:
cur_col = "red"
if label == 1:
cur_col = "green"
if label == 2:
cur_col = "yellow"
plt.plot([x_coord[i]], [y_coord[i]], "ro", color = cur_col, markersize = 10, linestyle = '-', linewidth = 5, markeredgecolor = 'black')
plt.show()
Результат работы алгоритма FRIS-STOLP для выборки «Ирисы Фишера» (classification map)