Analyse af Forventede Mål (xG) i Superligaen med Python
(Del 1) (Del 0)I moderne fodboldanalyse spiller statistikker en stadig vigtigere rolle, og én af de mest centrale statistikker er “Expected Goals” (xG). Expected Goals giver en dybere indsigt i holdenes og spillernes præstationer ved at kvantificere kvaliteten af de chancer, de skaber og afslutter. I dette indlæg vil jeg præsentere et Python-script, jeg har udviklet, som analyserer og visualiserer xG-data fra Superligaen.
Hvad er Expected Goals (xG)?
Expected Goals er en statistisk måling, der vurderer sandsynligheden for, at et skud resulterer i et mål, baseret på forskellige faktorer såsom skuddets afstand til målet, vinkel, skuddets type, og meget mere. Denne måling giver en mere nuanceret forståelse af et holds offensive præstationer end blot at se på antallet af scorede mål.
Introduktion til Scriptet
Mit script henter xG-data fra Superliga.dk og genererer detaljerede visualiseringer for hver kamp. Disse visualiseringer inkluderer et skudplot for både hjemme- og udeholdet samt en oversigt over de mest betydningsfulde chancer i kampen. Hvert hold præsenteres med deres respektive logoer for en nem og hurtig identifikation.
Hvordan Scriptet Fungerer
- Dataindsamling: Scriptet henter kampdata fra Superligaens officielle API.
- Datafiltrering: Kun færdigspillede kampe fra udvalgte runder analyseres.
- Visualisering: For hver kamp genereres et skudplot, der viser xG-værdierne for hver afslutning. Plottet gemmes automatisk som en PNG-fil navngivet efter de deltagende hold.
import os
import requests
from PIL import Image
import json
import pandas as pd
import matplotlib.pyplot as plt
from mplsoccer import Pitch
import time
# Define the rounds you want to fetch event IDs for
selected_rounds = ['2'] # Modify this list as needed
# Step 1: Fetch events
events_url = 'https://api.superliga.dk/events-v2?appName=dk.releaze.livecenter.spdk&access_token=5b6ab6f5eb84c60031bbbd24&env=production&locale=da&seasonId=20962'
response = requests.get(events_url)
events_data = response.json()
# Step 2: Filter finished events from selected rounds
finished_events = [
event for event in events_data['events']
if event['statusType'] == 'finished' and event['round'] in selected_rounds
]
print(f"Total finished events found: {len(finished_events)}")
def generate_plots(event_id):
try:
details_url = f'https://api.superliga.dk/opta-stats/event/{event_id}/detail-expected-goals?appName=superligadk&access_token=5b6ab6f5eb84c60031bbbd24&env=production&locale=da'
response = requests.get(details_url)
data = response.json()
home_team_id = data.get('homeId')
away_team_id = data.get('awayId')
# Paths to team logos
logo_directory = '/home/xxxxxxx/python_scripts/TeamPngs' # Update with the path to your logos directory
home_team_logo_path = os.path.join(logo_directory, f'{home_team_id}.png')
away_team_logo_path = os.path.join(logo_directory, f'{away_team_id}.png')
# Load team logos
home_team_logo = Image.open(home_team_logo_path)
away_team_logo = Image.open(away_team_logo_path)
# Extract home and away expected goals data
home_data = pd.DataFrame(data['expectedGoalsData']['home'])
away_data = pd.DataFrame(data['expectedGoalsData']['away'])
# Extract scores
home_score = data['score']['home']
away_score = data['score']['away']
# Adjusting the coordinates assuming the pitch is 120x80
home_data['x_adjusted'] = home_data['x'] * 120 / 100
home_data['y_adjusted'] = home_data['y'] * 80 / 100
away_data['x_adjusted'] = away_data['x'] * 120 / 100
away_data['y_adjusted'] = away_data['y'] * 80 / 100
# Identify the goals in home and away data
home_goals = home_data[home_data['type'] == 'goal'].copy()
away_goals = away_data[away_data['type'] == 'goal'].copy()
# Set up the pitch
pitch = Pitch(pitch_length=120, pitch_width=80, pitch_color='grass', line_color='white', goal_type='box')
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
# Plot home team shots and table
pitch.draw(ax=axes[0])
axes[0].set_title(f"{data['homeName']} ({home_score}) - Expected Goals", fontsize=20)
sc1 = pitch.scatter(home_data['x_adjusted'], home_data['y_adjusted'], s=home_data['expectedGoalsValue']*1000, c='red', label='Home xG', alpha=0.6, edgecolors='black', ax=axes[0])
# Add goal markers only if there are goals
if not home_goals.empty:
sc1_goals = pitch.scatter(home_goals['x_adjusted'], home_goals['y_adjusted'], s=home_goals['expectedGoalsValue']*1000, c='gold', label='Home Goals', alpha=1, edgecolors='black', ax=axes[0])
# Sort home xG data in descending order
top_home_xg = home_data.nlargest(5, "expectedGoalsValue").copy()
# Combine first name and last name for the table
top_home_xg['playerName'] = top_home_xg['firstName'] + ' ' + top_home_xg['lastName']
top_home_xg = top_home_xg[["expectedGoalsValue", "playerName"]] # Select relevant columns
# Add goal scorer to the top xG chances if applicable
if not home_goals.empty:
home_goals['playerName'] = home_goals['firstName'] + ' ' + home_goals['lastName']
home_goals = home_goals[["expectedGoalsValue", "playerName"]]
top_home_xg = pd.concat([home_goals, top_home_xg]).drop_duplicates().head(5)
# Display top 5 home xG chances in a table below the plot
table_home = axes[0].table(cellText=top_home_xg.values, colLabels=top_home_xg.columns, loc='bottom', cellLoc='center')
# Adjust table properties for better readability
table_home.auto_set_font_size(False)
table_home.set_fontsize(10)
table_home.scale(1, 1.5)
# Make goal-scoring opportunities bold
for key, cell in table_home.get_celld().items():
if key[0] > 0 and key[1] == 0: # data cells in first column (xG values)
if table_home[key].get_text().get_text() in home_goals['expectedGoalsValue'].astype(str).values:
table_home[key].set_fontsize(12)
table_home[key].set_text_props(fontweight='bold')
# Plot away team shots and table
pitch.draw(ax=axes[1])
axes[1].set_title(f"{data['awayName']} ({away_score}) - Expected Goals", fontsize=20)
sc2 = pitch.scatter(away_data['x_adjusted'], away_data['y_adjusted'], s=away_data['expectedGoalsValue']*1000, c='blue', label='Away xG', alpha=0.6, edgecolors='black', ax=axes[1])
# Add goal markers only if there are goals
if not away_goals.empty:
sc2_goals = pitch.scatter(away_goals['x_adjusted'], away_goals['y_adjusted'], s=away_goals['expectedGoalsValue']*1000, c='gold', label='Away Goals', alpha=1, edgecolors='black', ax=axes[1])
# Sort away xG data in descending order
top_away_xg = away_data.nlargest(5, "expectedGoalsValue").copy()
# Combine first name and last name for the table
top_away_xg['playerName'] = top_away_xg['firstName'] + ' ' + top_away_xg['lastName']
top_away_xg = top_away_xg[["expectedGoalsValue", "playerName"]] # Select relevant columns
# Add goal scorer to the top xG chances if applicable
if not away_goals.empty:
away_goals['playerName'] = away_goals['firstName'] + ' ' + away_goals['lastName']
away_goals = away_goals[["expectedGoalsValue", "playerName"]]
top_away_xg = pd.concat([away_goals, top_away_xg]).drop_duplicates().head(5)
# Display top 5 away xG chances in a table below the plot
table_away = axes[1].table(cellText=top_away_xg.values, colLabels=top_away_xg.columns, loc='bottom', cellLoc='center')
# Adjust table properties for better readability
table_away.auto_set_font_size(False)
table_away.set_fontsize(10)
table_away.scale(1, 1.5)
# Make goal-scoring opportunities bold
for key, cell in table_away.get_celld().items():
if key[0] > 0 and key[1] == 0: # data cells in first column (xG values)
if table_away[key].get_text().get_text() in away_goals['expectedGoalsValue'].astype(str).values:
table_away[key].set_fontsize(12)
table_away[key].set_text_props(fontweight='bold')
# Add legends (moved above tables for better visibility)
axes[0].legend(loc='upper left', fontsize=10)
axes[1].legend(loc='upper left', fontsize=10)
# Add team logos near team names
ax_logo_home = fig.add_axes([0.07, 0.8, 0.1, 0.1], anchor='NE', zorder=1)
ax_logo_home.imshow(home_team_logo)
ax_logo_home.axis('off')
ax_logo_away = fig.add_axes([0.52, 0.8, 0.1, 0.1], anchor='NE', zorder=1)
ax_logo_away.imshow(away_team_logo)
ax_logo_away.axis('off')
# Add a decal (text annotation) in the final figure
fig.text(0.5, 0.95, 'EXPECTED GOALS ANALYSIS', ha='center', va='center', fontsize=38, fontweight='bold', color='blue')
# Set up filename and directory for saving the plot
output_directory = '/home/xxxxxx/R_Superliga' # Specify your directory here
os.makedirs(output_directory, exist_ok=True) # Create directory if it doesn't exist
filename = f'expected_goals_event_{event_id}.png'
# Save the plot
plt.savefig(os.path.join(output_directory, filename), bbox_inches='tight')
# Optional: Close the plot to free up memory
plt.close(fig)
except Exception as e:
print(f"An error occurred while processing event ID {event_id}: {e}")
# Iterate over the finished events and generate plots
for event in finished_events:
print(f"Processing event ID: {event['eventId']}")
generate_plots(event['eventId'])
time.sleep(1) # Add a delay to avoid hitting the rate limit
