Рекурсивный отбор признаков. Динамический шаг в танце feature selection

40512d6db49239017e1867e5bb87957b.png
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

best_dynamic_iter = np.array(dynamic_f1_scores).argmax()
best_super_dynamic_iter = np.array(super_dynamic_f1_scores).argmax()
best_fixed_iter = np.array(fixed_f1_scores).argmax()

#График 1.1 Количество отобранных признаков в зависимости от числа итераций
fig, ax = plt.subplots(figsize=(24, 8))

ax.plot(range(len(features_dynamic)), features_dynamic, label='ДШ(К)', marker='o')
ax.plot(range(len(features_super_dynamic)), features_super_dynamic, label='ДШ(З)', marker='v')
ax.plot(range(len(features_fixed)), features_fixed, label='ФШ', marker='s')

ax.scatter([best_dynamic_iter], [features_dynamic[best_dynamic_iter]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
ax.scatter([best_super_dynamic_iter], [features_super_dynamic[best_super_dynamic_iter]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
ax.scatter([best_fixed_iter], [features_fixed[best_fixed_iter]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')

ax.xaxis.set_major_locator(MaxNLocator(4 * len(ax.get_xticks())))
ax.yaxis.set_major_locator(MaxNLocator(3 * len(ax.get_yticks()))) 

ax.set_title('Number of Features Selected vs. Iterations')
ax.set_xlabel('Iterations')
ax.set_ylabel('Number of Features Selected')
ax.legend()
ax.grid(True)

plt.show()

# Графики 1.2. Метрика и затраченное время на обучение и валидацию от порядкового номера итерации.
fig, axs = plt.subplots(1, 2, figsize=(24, 8))

# F1-Score от количества итераций
axs[0].plot(range(len(dynamic_f1_scores)), dynamic_f1_scores, label='ДШ(К)', marker='o', markersize=4)
axs[0].plot(range(len(super_dynamic_f1_scores)), super_dynamic_f1_scores, label='ДШ(З)', marker='v', markersize=4)
axs[0].plot(range(len(fixed_f1_scores)), fixed_f1_scores, label='ФШ', marker='s', markersize=4)

axs[0].scatter([best_dynamic_iter], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[0].scatter([best_super_dynamic_iter], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[0].scatter([best_fixed_iter],[fixed_f1_scores[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')

axs[0].xaxis.set_major_locator(MaxNLocator(4 * len(axs[0].get_xticks())))
axs[0].yaxis.set_major_locator(MaxNLocator(2 * len(axs[0].get_yticks()))) 

axs[0].set_title('F1-Score vs. Number of Iterations')
axs[0].set_xlabel('Iterations')
axs[0].set_ylabel('Cross-Validation F1-Score')
axs[0].legend()
axs[0].grid(True)

# Время от количества итераций
axs[1].plot(range(len(dynamic_f1_times)), np.cumsum(dynamic_f1_times), label='ДШ(К)', marker='o', markersize=4)
axs[1].plot(range(len(super_dynamic_f1_times)), np.cumsum(super_dynamic_f1_times), label='ДШ(З)', marker='s', markersize=4)
axs[1].plot(range(len(fixed_f1_times)), np.cumsum(fixed_f1_times), label='ФШ', marker='v', markersize=4)

axs[1].scatter([best_dynamic_iter], [np.cumsum(dynamic_f1_times)[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[1].scatter([best_super_dynamic_iter], [np.cumsum(super_dynamic_f1_times)[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[1].scatter([best_fixed_iter],[np.cumsum(fixed_f1_times)[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')

axs[1].xaxis.set_major_locator(MaxNLocator(4 * len(axs[1].get_xticks())))
axs[1].yaxis.set_major_locator(MaxNLocator(2 * len(axs[1].get_yticks()))) 

axs[1].set_title('Cumulative CV Time vs. Number of Iterations')
axs[1].set_xlabel('Iterations')
axs[1].set_ylabel('Cumulative CV Time (seconds)')
axs[1].legend()
axs[1].grid(True)

plt.show()

# Графики 1.3.1 Метрика и затраченное время на обучение и валидацию от количества признаков.
fig, axs = plt.subplots(1, 2, figsize=(24, 8))

# F1-Score от количества признаков
axs[0].plot(features_dynamic, dynamic_f1_scores, label='ДШ(К)', marker='o', markersize=4)
axs[0].plot(features_super_dynamic, super_dynamic_f1_scores, label='ДШ(З)', marker='v', markersize=4)
axs[0].plot(features_fixed, fixed_f1_scores, label='ФШ', marker='s', markersize=4)

axs[0].scatter([features_dynamic[best_dynamic_iter]], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[0].scatter([features_super_dynamic[best_super_dynamic_iter]], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[0].scatter([features_fixed[best_fixed_iter]], [fixed_f1_scores[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')

axs[0].xaxis.set_major_locator(MaxNLocator(4 * len(axs[0].get_xticks())))
axs[0].yaxis.set_major_locator(MaxNLocator(2 * len(axs[0].get_yticks())))

axs[0].set_ylim(0.54, 0.64)

axs[0].set_title('F1-Score vs. Number of Features')
axs[0].set_xlabel('Number of Features')
axs[0].set_ylabel('Cross-Validation F1-Score')
axs[0].legend()
axs[0].invert_xaxis()
axs[0].grid(True)

# Время от количества признаков
axs[1].plot(features_dynamic, np.cumsum(dynamic_f1_times), label='ДШ(К)', marker='o', markersize=4)
axs[1].plot(features_super_dynamic, np.cumsum(super_dynamic_f1_times), label='ДШ(З)', marker='v', markersize=4)
axs[1].plot(features_fixed, np.cumsum(fixed_f1_times), label='ФШ', marker='s', markersize=4)

axs[1].scatter([features_dynamic[best_dynamic_iter]], [np.cumsum(dynamic_f1_times)[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[1].scatter([features_super_dynamic[best_super_dynamic_iter]], [np.cumsum(super_dynamic_f1_times)[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[1].scatter([features_fixed[best_fixed_iter]], [np.cumsum(fixed_f1_times)[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')

axs[1].xaxis.set_major_locator(MaxNLocator(4 * len(axs[1].get_xticks())))
axs[1].yaxis.set_major_locator(MaxNLocator(2 * len(axs[1].get_yticks()))) 

axs[1].set_title('Cumulative CV Time vs. Number of Features')
axs[1].set_xlabel('Number of Features')
axs[1].set_ylabel('Cumulative CV Time (seconds)')
axs[1].legend()
axs[1].invert_xaxis()
axs[1].grid(True)

plt.show()

# График 1.3.2 Метрика от количества признаков. Увеличенный масштаб

fig, ax = plt.subplots(figsize=(24, 8))

ax.plot(features_dynamic, dynamic_f1_scores, label='ДШ(К)', marker='o')
ax.plot(features_super_dynamic, super_dynamic_f1_scores, label='ДШ(З)', marker='v')
ax.plot(features_fixed, fixed_f1_scores, label='ФШ', marker='s')

ax.scatter([features_dynamic[best_dynamic_iter]], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
ax.scatter([features_super_dynamic[best_super_dynamic_iter]], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
ax.scatter([features_fixed[best_fixed_iter]], [fixed_f1_scores[best_fixed_iter]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')

ax.xaxis.set_major_locator(MaxNLocator(4 * len(ax.get_xticks())))
ax.yaxis.set_major_locator(MaxNLocator(2 * len(ax.get_yticks()))) 

ax.set_title('F1-Score vs. Number of Features')
ax.set_xlabel('Number of Features')
ax.set_ylabel('Cross-Validation F1-Score')
ax.legend()
ax.invert_xaxis()
ax.grid(True)

ax.set_xlim(180, 20)
ax.set_ylim(0.58, 0.64)

plt.show()

#График 1.4 Объем отбрасываемой значимости в зависимости от номера итераци
fig, axs = plt.subplots(figsize=(24, 12))

axs.plot(range(1, len(features_dynamic)+1)[:50], info_out_dynamic[:50], label='ДШ(К)', marker='o')
axs.plot(range(1,len(features_super_dynamic)+1)[:50], info_out_super_dynamic[:50], label='ДШ(З)', marker='v')
axs.plot(range(1, len(features_fixed)+1)[:50], info_out_fixed[:50], label='ФШ', marker='s')

axs.scatter([best_dynamic_iter], [info_out_dynamic[best_dynamic_iter-1]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs.scatter([best_super_dynamic_iter], [info_out_dynamic[best_super_dynamic_iter-1]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs.scatter([best_fixed_iter], [info_out_fixed[best_fixed_iter-1]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')

axs.xaxis.set_major_locator(MaxNLocator(4 * len(axs.get_xticks())))
axs.yaxis.set_major_locator(MaxNLocator(3 * len(axs.get_yticks()))) 

axs.set_title('Ratio of discarded importance vs. Iterations')
axs.set_xlabel('Iterations')
axs.set_ylabel('ratio of discarded importance')
axs.legend()
axs.grid(True)

plt.show()

© Habrahabr.ru