In a H2H league every team matches up against one other team each week. Certain teams will have easier matchups than others. Throughout a season a team might get lucky and a few weeks and play a few teams that are not having a good week. The certain degree of luck is portayed in the final standings. I wanted to see how the standings would change with different scheduales. I created a simulation that would randomize the scheduale for every team.

In order to be sure my simulation was working properly I recreated the standings from our league. I discovered the scheduale format that yahoo uses, and used that in my simulation. My simulated standings came out exactly the same as the actual league standings. I then ran the simulation 1000 times, each time randomizing the team ID of each team.

Within the simulation I saved the winning stat value each week for every stat category for later analysis.

library(tidyverse)
library(dplyr)
library(knitr)
library(kableExtra)
library(htmlTable)

yahoo_scheduale_func <- function(){
  value <- c(-1:7)
  team.scheduale <- matrix(0 , 10 , 9)
  for (z in 1:9) {
    for (i in 1:9) {
      for (j in 1:9) {
        if (i + value[j] != 0) {
          if (i == j) {
            team.scheduale[i,(i + value[j]) %% 9 ] <- 10
          }
          else team.scheduale[i,(i + value[j]) %% 9 ] <- j
        }
        else next

      }
    }
    team.scheduale[10,z] <- (sum(1:10) - sum(team.scheduale[,z]))
  }
  team.scheduale[,9] <- 10:1
  return(team.scheduale)

}


full_scheduale <- function() {
  rep_sched <- matrix(0,10,21)
  team.scheduale <- yahoo_scheduale_func()
  for (i in 1:10) {
    rep_sched[i,] <- rep(team.scheduale[i,], length.out = 21)
  }
  return(rep_sched)
}

yahoo <- full_scheduale()

sim_standings <- function(num,
                          all_week_stats = read_delim("/Users/noahknoblauch/Dropbox/Baseball/all_week_stats.txt",delim = "\t",guess_max = 10000),
                          names = read_delim("/Users/noahknoblauch/Dropbox/Baseball/Team_names.txt",delim = "\t"),
                          team_id = read_delim("/Users/noahknoblauch/Dropbox/Baseball/team_id.txt",delim = "\t"),
                          Stats = c("Runs","Runs Batted In","Home Runs","Stolen Bases","AVG","Wins","Saves","Strikeouts","ERA","WHIP"),
                          sim_standings = list(),
                          stat_list = list()) {

  team_id <- inner_join(names,team_id,by = "team_name")

  all_week_stats_n <- all_week_stats %>%
    filter(week != 24,week != 23,week != 22) %>%
    filter(Stat != "Hits / At Bats", Stat != "IP") %>%
    mutate(stat_value = as.numeric(stat_value))

  for (value in 1:num) {
    team_id <- team_id %>%
      mutate(team_id = sample(1:10))

    all_week_stats <- inner_join(all_week_stats_n,team_id,by = "real_name")

    all_team_stats <- split(all_week_stats,all_week_stats$team_id)

    wins_df = data.frame(wins = matrix(0, nrow = 21 , ncol = 1))
    loss_df = data.frame(loss = matrix(0, nrow = 21 , ncol = 1))
    season_standings <- list()

    list_df <- list()
    for (z in 1:10) {
      list_df[[z]] <- split(all_team_stats[[z]],all_team_stats[[z]]$Stat)
    }

    for (j in 1:10) {
      for (i in Stats) {
        for (x in 1:21) {
          if (i == "WHIP" | i == "ERA") {
            if (list_df[[j]][[i]][["stat_value"]][x] < list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              stat_list[[i]] <- c(stat_list[[i]],list_df[[j]][[i]][["stat_value"]][x])
              wins_df[[1]][[x]] <- 1
              loss_df[[1]][[x]] <- 0
            }
            if (list_df[[j]][[i]][["stat_value"]][x] == list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              wins_df[[1]][[x]] <- 0
              loss_df[[1]][[x]] <- 0
            }
            if (list_df[[j]][[i]][["stat_value"]][x] > list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              wins_df[[1]][[x]] <- 0
              loss_df[[1]][[x]] <- 1
            }
          }
          else {
            if (list_df[[j]][[i]][["stat_value"]][x] > list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              stat_list[[i]] <- c(stat_list[[i]],list_df[[j]][[i]][["stat_value"]][x])
              wins_df[[1]][[x]] <- 1
              loss_df[[1]][[x]] <- 0
            }
            if (list_df[[j]][[i]][["stat_value"]][x] == list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              wins_df[[1]][[x]] <- 0
              loss_df[[1]][[x]] <- 0
            }
            if (list_df[[j]][[i]][["stat_value"]][x] < list_df[[yahoo[j,x]]][[i]][["stat_value"]][x]) {
              wins_df[[1]][[x]] <- 0
              loss_df[[1]][[x]] <- 1
            }
          }
        }
        list_df[[j]][[i]] <- mutate(list_df[[j]][[i]],wins = wins_df[["wins"]],losses = loss_df[["loss"]])
      }
      season_standings[[j]] <- bind_rows(list_df[[j]])
      season_standings[[j]] <- season_standings[[j]] %>%
        group_by(real_name) %>%
        summarise(wins = sum(wins),losses = sum(losses),Pct = wins / (wins + losses))
    }
    sim_standings[[value]] <- bind_rows(season_standings)
    sim_standings[[value]] <- sim_standings[[value]] %>%
      arrange(desc(Pct))
  }
  standings_stats <- list(sim_standings,stat_list)

  return(standings_stats)
}

This R Markdown site was created with workflowr