from bs4 import BeautifulSoup
import datetime
import requests
from ..conference import Conference
from ..division import Division
from ..event import Event
from ..franchise import Franchise
from ..game import Game
from ..gameinfo import GameInfo
from ..gametime import Gametime
from ..list import List
from ..location import Location
from ..official import Official
from ..player import Player
from ..playerstats import PlayerStats
from .. import rink
from ..shift import Shift
from ..team import Team
from ..venue import Venue
def _fetch_shifts(game_id, side):
season_id = game_id // 1000000
lower_game_id = game_id % 1000000
side_id = "H" if side == "home" else "V"
url = "http://www.nhl.com/scores/htmlreports/{:04d}{:04d}/T{:1s}{:06d}.HTM".format(season_id, season_id + 1, side_id, lower_game_id)
print(url)
result = requests.get(url)
print(result)
return result.text
def _parse_date(date_str):
year = int(date_str.split("-")[0])
month = int(date_str.split("-")[1])
day = int(date_str.split("-")[2])
return datetime.date(year, month, day)
def _parse_height(height_str):
feet = int(height_str.split("\' ")[0])
inches = int(height_str.split("\' ")[1].split("\"")[0])
return feet*12 + inches
def _parse_gametime(period_time):
return 60*int(period_time.split(":")[0]) + int(period_time.split(":")[1])
[docs]def parse_conference(json):
id = json["id"]
if Conference.has_key(id): return Conference.from_key(id)
name = json["name"]
# NOTE: `nameShort` and `abbreviation` keys are not guaranteed. It depends on
# which json object is returned.
name_short = json.get("shortName", name[0:-3])
abbreviation = json.get("abbreviation", name[0])
return Conference(id, name, name_short, abbreviation)
[docs]def parse_conferences(json):
return List([parse_conference(j) for j in json])
[docs]def parse_division(json):
id = json["id"]
if Division.has_key(id): return Division.from_key(id)
name = json["name"]
name_short = json["nameShort"]
abbreviation = json["abbreviation"]
return Division(id, name, name_short, abbreviation)
[docs]def parse_events(json, info, home_score, away_score, home_shifts, away_shifts, flip_sides):
id = json["about"]["eventIdx"]*10
game_id = info.id
if Event.has_key(game_id, id): return Event.from_key(game_id, id)
type = json["result"]["eventTypeId"]
if type in ["STOP"]:
type = "STOPPAGE"
subtype = json["result"]["description"]
elif type in ["MISSED_SHOT"]:
subtype = json["result"]["description"].split(" - ")[1].upper()
elif type in ["SHOT", "GOAL"]:
subtype = json["result"]["secondaryType"].upper()
else:
subtype = json["result"].get("secondaryType", None)
if subtype:
subtype = subtype.upper().replace(":", "")
subtype = subtype.upper().replace(" ", "_")
subtype = subtype.upper().replace("-", "_")
subtype = subtype.upper().replace("___", "_")
if subtype == "INTERFERENCE_GOALKEEPER":
subtype = "GOALTENDER_INTERFERENCE"
if subtype == "HI_STICKING":
subtype = "HIGH_STICKING"
if subtype == "HI_STICK_DOUBLE_MINOR":
subtype = "HIGH_STICKING_DOUBLE_MINOR"
if subtype == "GOALPOST":
subtype = "HIT_GOALPOST"
if subtype == "DELAYING_GAME_PUCK_OVER_GLASS":
subtype = "DELAY_OF_GAME_PUCK_OVER_GLASS"
if subtype == "DELAY_GM_FACE_OFF_VIOLATION":
subtype = "DELAY_OF_GAME_FACE_OFF_VIOLATION"
if "team" in json:
if info.home_team.id == json["team"]["id"]:
score = (home_score, away_score)
by_team = info.home_team
on_team = info.away_team
else:
score = (away_score, home_score)
by_team = info.away_team
on_team = info.home_team
else:
score = (None, None)
by_team = None
on_team = None
players = json.get("players", [])
if len(players) > 0:
# NOTE: This is safe because the players have necessarily already been parsed and are in memory
by_player = Player.from_key(players[0]["player"]["id"])
else:
by_player = None
if len(players) > 1:
# NOTE: This is safe because the players have necessarily already been parsed and are in memory
on_player = Player.from_key(players[-1]["player"]["id"])
else:
on_player = None
if len(players) > 2:
# NOTE: This is safe because the players have necessarily already been parsed and are in memory
assist_players = List([Player.from_key(p["player"]["id"]) for p in players[1:-1]])
else:
assist_players = List()
gametime = Gametime(json["about"]["period"], _parse_gametime(json["about"]["periodTime"]))
location = parse_location(json["coordinates"], gametime, flip_sides)
# if "team" in json:
# # NOTE: This is safe because the team has necessarily already been parsed and is in memory
# team = Team.from_key(json["team"]["id"])
# else:
# team = None
if type in ["PENALTY"]:
value = json["result"]["penaltyMinutes"]
elif type in ["BLOCKED_SHOT"]:
if on_team.id == info.home_team.id:
value = location.distance(rink.AWAY_GOAL)
else:
value = location.distance(rink.HOME_GOAL)
elif type in ["MISSED_SHOT", "SHOT", "GOAL"]:
if by_team.id == info.home_team.id:
value = location.distance(rink.AWAY_GOAL)
else:
value = location.distance(rink.HOME_GOAL)
else:
value = None
if by_team is None:
home_players_on_ice = List()
away_players_on_ice = List()
elif type in ["ASSIST", "GOAL", "PENALTY"]:
home_players_on_ice = home_shifts.filter("on.sec", gametime.sec, "<").filter("off.sec", gametime.sec, ">=").sort("player_id")
away_players_on_ice = away_shifts.filter("on.sec", gametime.sec, "<").filter("off.sec", gametime.sec, ">=").sort("player_id")
else:
home_players_on_ice = home_shifts.filter("on.sec", gametime.sec, "<=").filter("off.sec", gametime.sec, ">").sort("player_id")
away_players_on_ice = away_shifts.filter("on.sec", gametime.sec, "<=").filter("off.sec", gametime.sec, ">").sort("player_id")
if by_team is info.home_team:
by_players_on_ice = home_players_on_ice
on_players_on_ice = away_players_on_ice
else:
by_players_on_ice = away_players_on_ice
on_players_on_ice = home_players_on_ice
events = List()
if type == "GOAL" and len(assist_players) >= 2:
events.append(Event(game_id, id + 2, "ASSIST", "SECONDARY", gametime, location, value, score, assist_players[1], on_player, by_team, on_team, by_players_on_ice, on_players_on_ice))
if type == "GOAL" and len(assist_players) >= 1:
events.append(Event(game_id, id + 1, "ASSIST", "PRIMARY", gametime, location, value, score, assist_players[0], on_player, by_team, on_team, by_players_on_ice, on_players_on_ice))
events.append(Event(game_id, id, type, subtype, gametime, location, value, score, by_player, on_player, by_team, on_team, by_players_on_ice, on_players_on_ice))
return events
# return Event(game_id, id, type, subtype, gametime, location, value, score, by_player, with_players, on_player, by_team, on_team, by_players_on_ice, on_players_on_ice)
[docs]def parse_franchise(json):
id = json["franchiseId"]
if Franchise.has_key(id): return Franchise.from_key(id)
name = json["teamName"]
return Franchise(id, name)
[docs]def parse_game(json):
id = json["gamePk"]
if Game.has_key(id): return Game.from_key(id)
season_id = json["gameData"]["game"]["season"]
game_type = json["gameData"]["game"]["type"]
if game_type == "PR": game_type = "PRE_SEASON"
elif game_type == "R": game_type = "REGULAR_SEASON"
elif game_type == "P": game_type = "PLAYOFF"
start_datetime = json["gameData"]["datetime"]["dateTime"]
end_datetime = json["gameData"]["datetime"]["endDateTime"]
venue = parse_venue(json["gameData"]["venue"])
home_team = parse_team(json["gameData"]["teams"]["home"])
away_team = parse_team(json["gameData"]["teams"]["away"])
home_score = json["liveData"]["boxscore"]["teams"]["home"]["teamStats"]["teamSkaterStats"]["goals"]
away_score = json["liveData"]["boxscore"]["teams"]["away"]["teamStats"]["teamSkaterStats"]["goals"]
referees = List()
linesmen = List()
for j in json["liveData"]["boxscore"]["officials"]:
official = parse_official(j["official"])
if j["officialType"] == "Referee":
referees.append(official)
elif j["officialType"] == "Linesman":
linesmen.append(official)
end_type = None
end_gametime = None
for j in json["liveData"]["plays"]["allPlays"]:
if j["result"]["eventTypeId"] == "GAME_END":
end_type = j["about"]["periodType"]
end_gametime = Gametime(j["about"]["period"], _parse_gametime(j["about"]["periodTime"]))
info = GameInfo(id, season_id, game_type, start_datetime, end_datetime, venue, home_team, away_team, (home_score, away_score), end_type, end_gametime, referees, linesmen)
home_coach = json["liveData"]["boxscore"]["teams"]["home"]["coaches"][0]["person"]["fullName"] if len(json["liveData"]["boxscore"]["teams"]["home"]["coaches"]) >= 1 else None
away_coach = json["liveData"]["boxscore"]["teams"]["away"]["coaches"][0]["person"]["fullName"] if len(json["liveData"]["boxscore"]["teams"]["away"]["coaches"]) >= 1 else None
players = List([parse_player(j) for j in json["gameData"]["players"].values()])
home_shifts = List()
home_shifts_html = _fetch_shifts(id, "home")
for j in json["liveData"]["boxscore"]["teams"]["home"]["players"].values():
if j["person"]["id"] not in json["liveData"]["boxscore"]["teams"]["home"]["scratches"]:
player_id = j["person"]["id"]
player_number = int(j["jerseyNumber"])
player_shifts = parse_shifts(id, home_team.id, player_id, player_number, home_shifts_html)
home_shifts.extend(player_shifts)
away_shifts = List()
away_shifts_html = _fetch_shifts(id, "away")
for j in json["liveData"]["boxscore"]["teams"]["away"]["players"].values():
if j["person"]["id"] not in json["liveData"]["boxscore"]["teams"]["away"]["scratches"]:
player_id = j["person"]["id"]
player_number = int(j["jerseyNumber"])
player_shifts = parse_shifts(id, away_team.id, player_id, player_number, away_shifts_html)
away_shifts.extend(player_shifts)
# all_events = List()
# flip_sides = json["liveData"]["linescore"]["periods"][0]["home"]["rinkSide"] == "left"
# for j in json["liveData"]["plays"]["allPlays"]:
# current_home_score = all_events.filter("type", "GOAL").filter("by_team.id", home_team.id).len
# current_away_score = all_events.filter("type", "GOAL").filter("by_team.id", away_team.id).len
# events = parse_eventss(info, current_home_score, current_away_score, home_shifts, away_shifts, j, flip_sides)
# all_events.extend(events)
# TODO: Find a way to estimate this if not provided
flip_sides = json["liveData"]["linescore"]["periods"][0]["home"].get("rinkSide", "right") == "left"
events = List()
for play in json["liveData"]["plays"]["allPlays"]:
current_home_score = events.filter("type", "GOAL").filter("by_team.id", home_team.id).len
current_away_score = events.filter("type", "GOAL").filter("by_team.id", away_team.id).len
e = parse_events(play, info, current_home_score, current_away_score, home_shifts, away_shifts, flip_sides)
events.extend(e)
shifts = home_shifts + away_shifts
player_stats = List()
for player in players:
ps = PlayerStats(player, shifts.filter("player_id", player.id), events.filter("by_player.id", player.id))
player_stats.append(ps)
return Game(info, home_team, away_team, player_stats, events)
[docs]def parse_location(json, gametime, flip_sides):
if "x" in json:
x = int(json["x"])
y = int(json["y"])
if flip_sides:
x *= -1
y *= -1
if gametime.period % 2 == 0:
x *= -1
y *= -1
return Location(x, y)
else:
return None
[docs]def parse_official(json):
id = json["id"]
if Official.has_key(id): return Official.from_key(id)
name = json["fullName"]
return Official(id, name)
[docs]def parse_player(json):
id = json["id"]
if Player.has_key(id): return Player.from_key(id)
name = json["fullName"]
# NOTE: Occassionally this key is not provided
_number = json.get("primaryNumber", None)
number = int(_number) if _number else None
position = json["primaryPosition"]["abbreviation"]
height = _parse_height(json["height"])
weight = json["weight"]
shoots_catches = json["shootsCatches"]
birth_date = _parse_date(json["birthDate"])
birth_city = json["birthCity"]
birth_country = json["birthCountry"]
return Player(id, name, number, position, height, weight, shoots_catches,
birth_date, birth_city, birth_country)
[docs]def parse_shifts(game_id, team_id, player_id, player_number, html):
shifts = List()
tree = BeautifulSoup(html, features="html.parser")
for row in tree.find_all("td", "playerHeading + border"):
number = int(row.get_text().split(" ")[0])
if number != player_number:
continue
row = row.find_next("tr")
while True:
row = row.find_next("tr")
cells = row.find_all("td")
if len(cells) == 6:
game_id = game_id
team = Team.from_key(team_id)
player = Player.from_key(player_id)
shift_id = int(cells[0].get_text()) - 1
period = int(cells[1].get_text())
on_period_mmss = cells[2].get_text().split(" /")[0]
off_period_mmss = cells[3].get_text().split(" /")[0]
on = Gametime(period, _parse_gametime(on_period_mmss))
off = Gametime(period, _parse_gametime(off_period_mmss))
shift = Shift(game_id, player_id, shift_id, on, off)
shifts.append(shift)
else:
break
return shifts
[docs]def parse_team(json):
id = json["id"]
if Team.has_key(id): return Team.from_key(id)
location = json["locationName"]
name = json["teamName"]
abbreviation = json["abbreviation"]
first_year = int(json["firstYearOfPlay"])
division = parse_division(json["division"])
conference = parse_conference(json["conference"])
franchise = parse_franchise(json["franchise"])
return Team(id, location, name, abbreviation, first_year, division, conference, franchise)
[docs]def parse_teams(json):
return List([parse_team(j) for j in json])
[docs]def parse_venue(json):
id = json["id"]
if Venue.has_key(id): return Venue.from_key(id)
name = json["name"]
return Venue(id, name)