Предсказываем результаты плей-офф и победителя Евро 2024 при помощи машинного обучения и чата GPT 4.0 (часть 2)

18845fa8c47e2b308b484b12e35ab300.jpg

Дисклеймер

По приведенным ниже прогнозам не стоит делать ставки, т.к. они не учитывают букмекерскую маржу, форму команд, а также много других факторов. В целом ставки на спорт весьма специфичное занятие, рассчитанное во многом на психологию человека, скрытые слабости и т. д., поэтому в целом не стоит ставить на основании любых прогнозов в интернете.

Результаты предсказаний группового этапа:

  • Correct Predictions (+): 15

  • Incorrect Predictions (-): 21

  • Percentage of Correct Predictions: 41.67%

  • Sum of Coefficients for Correct Predictions: 32.2

Процент удачного предсказания оказался на 16 процентов ниже, чем было заявлено изначально. При этом первые два тура держался довольно высокий процент прохода, но в третьем туре, где для многих команд вопрос выхода из группы не решался, процент удачных прогнозов сильно снизился.

Делая ставки по 100 долларов на 36 матчей, мы бы потеряли примерно 380 долларов.

Прогноз на плей-офф:

library(randomForest)
library(dplyr)

# Загрузка данных
data <- read.csv("filtered_results.csv")

# Преобразование столбца date в формат даты
data$date <- as.Date(data$date, format="%Y-%m-%d")

# Создание целевой переменной (исключение ничьих)
data$result <- ifelse(data$home_score > data$away_score, 1, 0)

# Преобразование данных в единый формат
home_games <- data %>%
    select(team = home_team, opponent = away_team, score = home_score, opponent_score = away_score, result)

away_games <- data %>%
    select(team = away_team, opponent = home_team, score = away_score, opponent_score = home_score, result) %>%
    mutate(result = ifelse(result == 1, 0, 1))

all_games <- bind_rows(home_games, away_games)

# Создание новых признаков
team_stats <- all_games %>%
    group_by(team) %>%
    summarise(total_games = n(),
              total_win_rate = mean(result == 1),
              total_avg_score = mean(score))

# Подготовка данных для модели
data <- data %>%
    left_join(team_stats, by = c("home_team" = "team")) %>%
    rename(home_team_total_games = total_games,
           home_team_total_win_rate = total_win_rate,
           home_team_total_avg_score = total_avg_score) %>%
    left_join(team_stats, by = c("away_team" = "team")) %>%
    rename(away_team_total_games = total_games,
           away_team_total_win_rate = total_win_rate,
           away_team_total_avg_score = total_avg_score)

# Проверка и замена NA значений
data[is.na(data)] <- 0

# Подготовка данных для модели
features <- c("home_team_total_win_rate", "away_team_total_win_rate", 
              "home_team_total_games", "away_team_total_games", 
              "home_team_total_avg_score", "away_team_total_avg_score")
X <- data[features]
y <- factor(data$result)

# Разделение данных на обучающую и тестовую выборки
set.seed(42)
train_indices <- sample(seq_len(nrow(data)), size = 0.8 * nrow(data))
X_train <- X[train_indices, ]
y_train <- y[train_indices]
X_test <- X[-train_indices, ]
y_test <- y[-train_indices]

# Обучение модели Random Forest
rf_model <- randomForest(X_train, y_train, ntree=200, mtry=3, importance=TRUE)

# Предсказание на тестовой выборке
y_pred <- predict(rf_model, X_test)
accuracy <- sum(y_pred == y_test) / length(y_test)
print(paste("Accuracy:", accuracy))

# Функция для предсказания результатов матчей
predict_matches <- function(matches, model, stats) {
  matches <- matches %>%
    left_join(stats, by = c("home_team" = "team")) %>%
    rename(home_team_total_win_rate = total_win_rate,
           home_team_total_games = total_games,
           home_team_total_avg_score = total_avg_score) %>%
    left_join(stats, by = c("away_team" = "team")) %>%
    rename(away_team_total_win_rate = total_win_rate,
           away_team_total_games = total_games,
           away_team_total_avg_score = total_avg_score)

  matches[is.na(matches)] <- 0

  predictions <- predict(model, matches[features])
  results <- ifelse(predictions == 1, "Home Win", "Away Win")
  
  return(results)
}

# 1/8 финала
round_of_16 <- data.frame(
  home_team = c("Switzerland", "Germany", "England", "Spain", "France", "Portugal", "Romania", "Austria"),
  away_team = c("Italy", "Denmark", "Slovakia", "Georgia", "Belgium", "Slovenia", "Netherlands", "Turkey")
)

round_of_16_results <- predict_matches(round_of_16, rf_model, team_stats)
round_of_16

# 1/4 финала
quarterfinals <- data.frame(
  home_team = c(
    ifelse(round_of_16_results[1] == "Home Win", "Switzerland", "Italy"),
    ifelse(round_of_16_results[2] == "Home Win", "Germany", "Denmark"),
    ifelse(round_of_16_results[3] == "Home Win", "England", "Slovakia"),
    ifelse(round_of_16_results[4] == "Home Win", "Spain", "Georgia")
  ),
  away_team = c(
    ifelse(round_of_16_results[5] == "Home Win", "France", "Belgium"),
    ifelse(round_of_16_results[6] == "Home Win", "Portugal", "Slovenia"),
    ifelse(round_of_16_results[7] == "Home Win", "Romania", "Netherlands"),
    ifelse(round_of_16_results[8] == "Home Win", "Austria", "Turkey")
  )
)

quarterfinal_results <- predict_matches(quarterfinals, rf_model, team_stats)
quarterfinals

# 1/2 финала
semifinals <- data.frame(
  home_team = c(
    ifelse(quarterfinal_results[1] == "Home Win", quarterfinals$home_team[1], quarterfinals$away_team[1]),
    ifelse(quarterfinal_results[2] == "Home Win", quarterfinals$home_team[2], quarterfinals$away_team[2])
  ),
  away_team = c(
    ifelse(quarterfinal_results[3] == "Home Win", quarterfinals$home_team[3], quarterfinals$away_team[3]),
    ifelse(quarterfinal_results[4] == "Home Win", quarterfinals$home_team[4], quarterfinals$away_team[4])
  )
)

semifinal_results <- predict_matches(semifinals, rf_model, team_stats)
semifinals

# Финал
final <- data.frame(
  home_team = ifelse(semifinal_results[1] == "Home Win", semifinals$home_team[1], semifinals$away_team[1]),
  away_team = ifelse(semifinal_results[2] == "Home Win", semifinals$home_team[2], semifinals$away_team[2])
)

final_result <- predict_matches(final, rf_model, team_stats)
final

# Вывод результатов
print("1/8 финала результаты:")
for (i in 1:nrow(round_of_16)) {
  print(paste(round_of_16$home_team[i], "vs", round_of_16$away_team[i], "-> Prediction:", round_of_16_results[i]))
}

print("1/4 финала результаты:")
for (i in 1:nrow(quarterfinals)) {
  print(paste(quarterfinals$home_team[i], "vs", quarterfinals$away_team[i], "-> Prediction:", quarterfinal_results[i]))
}

print("1/2 финала результаты:")
for (i in 1:nrow(semifinals)) {
  print(paste(semifinals$home_team[i], "vs", semifinals$away_team[i], "-> Prediction:", semifinal_results[i]))
}

print("Финал результат:")
print(paste(final$home_team, "vs", final$away_team, "-> Prediction:", final_result))

1/8 финала:

[1] «Switzerland vs Italy → Prediction: Away Win»

[1] «Germany vs Denmark → Prediction: Away Win»

[1] «England vs Slovakia → Prediction: Home Win»

[1] «Spain vs Georgia → Prediction: Home Win»

[1] «France vs Belgium → Prediction: Home Win»

[1] «Portugal vs Slovenia → Prediction: Home Win»

[1] «Romania vs Netherlands → Prediction: Away Win»

[1] «Austria vs Turkey → Prediction: Away Win»

Процент точности вырос — «Accuracy: 0.724324324324324». Логично, учитывая игру на два результата.

Далее буду давать прогноз в комментариях в зависимости от реально сложившихся пар.

Спрогнозированный финал: «England vs Spain → Prediction: Away Win»

Победителем снова определена Испания, коэффициент на победу снизился с 9.00 до 5.50.

Habrahabr.ru прочитано 1260 раз