Рекурсивный отбор признаков. Динамический шаг в танце feature selection08.08.2024 08:00
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
best_dynamic_iter = np.array(dynamic_f1_scores).argmax()
best_super_dynamic_iter = np.array(super_dynamic_f1_scores).argmax()
best_fixed_iter = np.array(fixed_f1_scores).argmax()
#График 1.1 Количество отобранных признаков в зависимости от числа итераций
fig, ax = plt.subplots(figsize=(24, 8))
ax.plot(range(len(features_dynamic)), features_dynamic, label='ДШ(К)', marker='o')
ax.plot(range(len(features_super_dynamic)), features_super_dynamic, label='ДШ(З)', marker='v')
ax.plot(range(len(features_fixed)), features_fixed, label='ФШ', marker='s')
ax.scatter([best_dynamic_iter], [features_dynamic[best_dynamic_iter]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
ax.scatter([best_super_dynamic_iter], [features_super_dynamic[best_super_dynamic_iter]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
ax.scatter([best_fixed_iter], [features_fixed[best_fixed_iter]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')
ax.xaxis.set_major_locator(MaxNLocator(4 * len(ax.get_xticks())))
ax.yaxis.set_major_locator(MaxNLocator(3 * len(ax.get_yticks())))
ax.set_title('Number of Features Selected vs. Iterations')
ax.set_xlabel('Iterations')
ax.set_ylabel('Number of Features Selected')
ax.legend()
ax.grid(True)
plt.show()
# Графики 1.2. Метрика и затраченное время на обучение и валидацию от порядкового номера итерации.
fig, axs = plt.subplots(1, 2, figsize=(24, 8))
# F1-Score от количества итераций
axs[0].plot(range(len(dynamic_f1_scores)), dynamic_f1_scores, label='ДШ(К)', marker='o', markersize=4)
axs[0].plot(range(len(super_dynamic_f1_scores)), super_dynamic_f1_scores, label='ДШ(З)', marker='v', markersize=4)
axs[0].plot(range(len(fixed_f1_scores)), fixed_f1_scores, label='ФШ', marker='s', markersize=4)
axs[0].scatter([best_dynamic_iter], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[0].scatter([best_super_dynamic_iter], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[0].scatter([best_fixed_iter],[fixed_f1_scores[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')
axs[0].xaxis.set_major_locator(MaxNLocator(4 * len(axs[0].get_xticks())))
axs[0].yaxis.set_major_locator(MaxNLocator(2 * len(axs[0].get_yticks())))
axs[0].set_title('F1-Score vs. Number of Iterations')
axs[0].set_xlabel('Iterations')
axs[0].set_ylabel('Cross-Validation F1-Score')
axs[0].legend()
axs[0].grid(True)
# Время от количества итераций
axs[1].plot(range(len(dynamic_f1_times)), np.cumsum(dynamic_f1_times), label='ДШ(К)', marker='o', markersize=4)
axs[1].plot(range(len(super_dynamic_f1_times)), np.cumsum(super_dynamic_f1_times), label='ДШ(З)', marker='s', markersize=4)
axs[1].plot(range(len(fixed_f1_times)), np.cumsum(fixed_f1_times), label='ФШ', marker='v', markersize=4)
axs[1].scatter([best_dynamic_iter], [np.cumsum(dynamic_f1_times)[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[1].scatter([best_super_dynamic_iter], [np.cumsum(super_dynamic_f1_times)[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[1].scatter([best_fixed_iter],[np.cumsum(fixed_f1_times)[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')
axs[1].xaxis.set_major_locator(MaxNLocator(4 * len(axs[1].get_xticks())))
axs[1].yaxis.set_major_locator(MaxNLocator(2 * len(axs[1].get_yticks())))
axs[1].set_title('Cumulative CV Time vs. Number of Iterations')
axs[1].set_xlabel('Iterations')
axs[1].set_ylabel('Cumulative CV Time (seconds)')
axs[1].legend()
axs[1].grid(True)
plt.show()
# Графики 1.3.1 Метрика и затраченное время на обучение и валидацию от количества признаков.
fig, axs = plt.subplots(1, 2, figsize=(24, 8))
# F1-Score от количества признаков
axs[0].plot(features_dynamic, dynamic_f1_scores, label='ДШ(К)', marker='o', markersize=4)
axs[0].plot(features_super_dynamic, super_dynamic_f1_scores, label='ДШ(З)', marker='v', markersize=4)
axs[0].plot(features_fixed, fixed_f1_scores, label='ФШ', marker='s', markersize=4)
axs[0].scatter([features_dynamic[best_dynamic_iter]], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[0].scatter([features_super_dynamic[best_super_dynamic_iter]], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[0].scatter([features_fixed[best_fixed_iter]], [fixed_f1_scores[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')
axs[0].xaxis.set_major_locator(MaxNLocator(4 * len(axs[0].get_xticks())))
axs[0].yaxis.set_major_locator(MaxNLocator(2 * len(axs[0].get_yticks())))
axs[0].set_ylim(0.54, 0.64)
axs[0].set_title('F1-Score vs. Number of Features')
axs[0].set_xlabel('Number of Features')
axs[0].set_ylabel('Cross-Validation F1-Score')
axs[0].legend()
axs[0].invert_xaxis()
axs[0].grid(True)
# Время от количества признаков
axs[1].plot(features_dynamic, np.cumsum(dynamic_f1_times), label='ДШ(К)', marker='o', markersize=4)
axs[1].plot(features_super_dynamic, np.cumsum(super_dynamic_f1_times), label='ДШ(З)', marker='v', markersize=4)
axs[1].plot(features_fixed, np.cumsum(fixed_f1_times), label='ФШ', marker='s', markersize=4)
axs[1].scatter([features_dynamic[best_dynamic_iter]], [np.cumsum(dynamic_f1_times)[best_dynamic_iter]], c='red', s=30, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs[1].scatter([features_super_dynamic[best_super_dynamic_iter]], [np.cumsum(super_dynamic_f1_times)[best_super_dynamic_iter]], c='red', s=30, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs[1].scatter([features_fixed[best_fixed_iter]], [np.cumsum(fixed_f1_times)[best_fixed_iter]], c='red', s=30, marker='s', zorder=3, label='f1_score_max_ФШ')
axs[1].xaxis.set_major_locator(MaxNLocator(4 * len(axs[1].get_xticks())))
axs[1].yaxis.set_major_locator(MaxNLocator(2 * len(axs[1].get_yticks())))
axs[1].set_title('Cumulative CV Time vs. Number of Features')
axs[1].set_xlabel('Number of Features')
axs[1].set_ylabel('Cumulative CV Time (seconds)')
axs[1].legend()
axs[1].invert_xaxis()
axs[1].grid(True)
plt.show()
# График 1.3.2 Метрика от количества признаков. Увеличенный масштаб
fig, ax = plt.subplots(figsize=(24, 8))
ax.plot(features_dynamic, dynamic_f1_scores, label='ДШ(К)', marker='o')
ax.plot(features_super_dynamic, super_dynamic_f1_scores, label='ДШ(З)', marker='v')
ax.plot(features_fixed, fixed_f1_scores, label='ФШ', marker='s')
ax.scatter([features_dynamic[best_dynamic_iter]], [dynamic_f1_scores[best_dynamic_iter]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
ax.scatter([features_super_dynamic[best_super_dynamic_iter]], [super_dynamic_f1_scores[best_super_dynamic_iter]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
ax.scatter([features_fixed[best_fixed_iter]], [fixed_f1_scores[best_fixed_iter]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')
ax.xaxis.set_major_locator(MaxNLocator(4 * len(ax.get_xticks())))
ax.yaxis.set_major_locator(MaxNLocator(2 * len(ax.get_yticks())))
ax.set_title('F1-Score vs. Number of Features')
ax.set_xlabel('Number of Features')
ax.set_ylabel('Cross-Validation F1-Score')
ax.legend()
ax.invert_xaxis()
ax.grid(True)
ax.set_xlim(180, 20)
ax.set_ylim(0.58, 0.64)
plt.show()
#График 1.4 Объем отбрасываемой значимости в зависимости от номера итераци
fig, axs = plt.subplots(figsize=(24, 12))
axs.plot(range(1, len(features_dynamic)+1)[:50], info_out_dynamic[:50], label='ДШ(К)', marker='o')
axs.plot(range(1,len(features_super_dynamic)+1)[:50], info_out_super_dynamic[:50], label='ДШ(З)', marker='v')
axs.plot(range(1, len(features_fixed)+1)[:50], info_out_fixed[:50], label='ФШ', marker='s')
axs.scatter([best_dynamic_iter], [info_out_dynamic[best_dynamic_iter-1]], c='red', s=80, marker='o', zorder=3, label='f1_score_max_ДШ(К)')
axs.scatter([best_super_dynamic_iter], [info_out_dynamic[best_super_dynamic_iter-1]], c='red', s=80, marker='v', zorder=3, label='f1_score_max_ДШ(З)')
axs.scatter([best_fixed_iter], [info_out_fixed[best_fixed_iter-1]], c='red', s=80, marker='s', zorder=3, label='f1_score_max_ФШ')
axs.xaxis.set_major_locator(MaxNLocator(4 * len(axs.get_xticks())))
axs.yaxis.set_major_locator(MaxNLocator(3 * len(axs.get_yticks())))
axs.set_title('Ratio of discarded importance vs. Iterations')
axs.set_xlabel('Iterations')
axs.set_ylabel('ratio of discarded importance')
axs.legend()
axs.grid(True)
plt.show()
© Habrahabr.ru