In [7]:
# ----------------------------------------------------------
# IMPORT PACKAGES + SET DIRECTORY + LOAD DATA
# ----------------------------------------------------------
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import numpy as np
# Set working directory
os.chdir("/Users/alexsantos/Desktop/Self Learning")
# Load data
df = pd.read_csv("NbaPlayoffStats.csv")
#Show first rows
print(df.head())
Rk Player Age Team Pos G GS MP FG FGA ... ORB \ 0 1 Shai Gilgeous-Alexander 26 OKC PG 23 23 851 233 504 ... 23 1 2 Jalen Brunson 28 NYK PG 18 18 680 181 393 ... 12 2 3 Jalen Williams 23 OKC SG 23 23 796 178 396 ... 24 3 4 Pascal Siakam 30 IND PF 23 23 771 175 341 ... 30 4 5 Tyrese Haliburton 24 IND PG 23 23 772 146 315 ... 7 DRB TRB AST STL BLK TOV PF PTS Trp-Dbl 0 100 123 150 38 20 60 65 688 0 1 50 62 126 8 5 55 59 530 0 2 103 127 111 32 9 41 50 492 0 3 116 146 78 28 16 30 66 472 0 4 116 123 197 30 15 53 39 399 1 [5 rows x 31 columns]
In [9]:
# =====================================================================
# CLEANING + PREPPING NBA DATA
# =====================================================================
# ---------------------------
# CREATE NEEDED METRICS
# ---------------------------
# Field Goal Percentage
df["FG%"] = df["FG"] / df["FGA"]
# 3-Point Makes = 3P column
df["3PM"] = df["3P"]
# 3-Point Percentage
df["3P%"] = df["3P"] / df["3PA"].replace(0, np.nan)
# Rename TRB → REB for easier use
df["REB"] = df["TRB"]
# Assist/Turnover ratio
# Defensive impact later uses STL + BLK
print("Columns after cleaning:")
print(df.columns)
print(df.head())
# =====================================================================
# REALITY FILTER: REMOVE LOW-MINUTE, LOW-USAGE PLAYERS
# Keeps only real rotation players so analysis is meaningful.
# =====================================================================
# Minimum games played
min_games = 10
# Minimum minutes played
min_minutes = 150 # ~7 MPG over 20 games
# Minimum field-goal attempts
min_fga = 50
df = df[
(df["G"] >= min_games) &
(df["MP"] >= min_minutes) &
(df["FGA"] >= min_fga)
].copy()
print("Players remaining after filter:", len(df))
Columns after cleaning:
Index(['Rk', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'FG', 'FGA',
'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA',
'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
'Trp-Dbl', '3PM', 'REB'],
dtype='object')
Rk Player Age Team Pos G GS MP FG FGA ... TRB \
0 1 Shai Gilgeous-Alexander 26 OKC PG 23 23 851 233 504 ... 123
1 2 Jalen Brunson 28 NYK PG 18 18 680 181 393 ... 62
2 3 Jalen Williams 23 OKC SG 23 23 796 178 396 ... 127
3 4 Pascal Siakam 30 IND PF 23 23 771 175 341 ... 146
4 5 Tyrese Haliburton 24 IND PG 23 23 772 146 315 ... 123
AST STL BLK TOV PF PTS Trp-Dbl 3PM REB
0 150 38 20 60 65 688 0 32 123
1 126 8 5 55 59 530 0 48 62
2 111 32 9 41 50 492 0 35 127
3 78 28 16 30 66 472 0 35 146
4 197 30 15 53 39 399 1 54 123
[5 rows x 33 columns]
Players remaining after filter: 50
In [11]:
# =====================================================================
# 1. TOP SCORERS – VOLUME LEADERS
# =====================================================================
top_scorers = df.sort_values("PTS", ascending=False).head(10)
print(top_scorers[["Player", "PTS"]])
Player PTS 0 Shai Gilgeous-Alexander 688 1 Jalen Brunson 530 2 Jalen Williams 492 3 Pascal Siakam 472 4 Tyrese Haliburton 399 5 Karl-Anthony Towns 386 6 Anthony Edwards 380 7 Nikola Jokić 367 8 Chet Holmgren 349 9 Julius Randle 326
In [13]:
# =====================================================================
# 1. TOP SCORERS – HORIZONTAL BAR CHART
# =====================================================================
top_scorers = df.sort_values("PTS", ascending=False).head(10)
plt.figure(figsize=(10,6))
plt.barh(top_scorers["Player"], top_scorers["PTS"])
plt.title("Top Scorers (PTS)")
plt.xlabel("Points")
plt.ylabel("Player")
plt.gca().invert_yaxis() # highest at top
plt.tight_layout()
plt.show()
In [15]:
# =====================================================================
# 2. MOST EFFICIENT SCORERS – FG% OR TS%
# =====================================================================
efficient_scorers = df.sort_values("FG%", ascending=False).head(10)
print(efficient_scorers[["Player", "PTS", "FG%"]])
Player PTS FG% 28 Isaiah Hartenstein 186 0.619403 75 Mitchell Robinson 84 0.607843 56 Rudy Gobert 118 0.582278 22 T.J. McConnell 218 0.537143 21 Jaden McDaniels 220 0.514793 3 Pascal Siakam 472 0.513196 34 Naz Reid 156 0.509091 9 Julius Randle 326 0.502183 60 Isaiah Joe 107 0.493151 7 Nikola Jokić 367 0.488806
In [17]:
import matplotlib.pyplot as plt
# Filter real contributors
efficient = df[df["FGA"] >= 50].sort_values("FG%", ascending=False).head(12)
plt.figure(figsize=(10,6))
# RED bars
plt.bar(efficient["Player"].astype(str), efficient["FG%"] * 100, color="darkred")
plt.title("Most Efficient Scorers (FG% | Min 50 FGA)", fontsize=14)
plt.xlabel("Player")
plt.ylabel("Field Goal %")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
In [19]:
# =====================================================================
# 3. ELITE PERFORMERS – CLEAN REALISTIC VERSION
# =====================================================================
# Elite Score = Points × Field Goal Efficiency
df["EliteScore"] = df["PTS"] * df["FG%"]
elite_performers = df.sort_values("EliteScore", ascending=False).head(10)
print(
elite_performers[["Player", "PTS", "FG%", "EliteScore"]]
.to_string(index=False)
)
Player PTS FG% EliteScore
Shai Gilgeous-Alexander 688 0.462302 318.063492
Jalen Brunson 530 0.460560 244.096692
Pascal Siakam 472 0.513196 242.228739
Jalen Williams 492 0.449495 221.151515
Karl-Anthony Towns 386 0.487719 188.259649
Tyrese Haliburton 399 0.463492 184.933333
Nikola Jokić 367 0.488806 179.391791
Anthony Edwards 380 0.453020 172.147651
Julius Randle 326 0.502183 163.711790
Chet Holmgren 349 0.462406 161.379699
In [150]:
# =====================================================================
# 4. PLAYMAKERS – REALISTIC ASSIST / TURNOVER RATIO
# =====================================================================
# Filter out players with small samples to keep results realistic
filtered_df = df[
(df["MP"] >= 150) & # must play real minutes
(df["G"] >= 10) & # must play real games
(df["AST"] >= 30) # optional, ensures real assist volume
].copy()
# Safe AST/TO calculation (avoids division by zero)
filtered_df["AST_TO"] = filtered_df["AST"] / filtered_df["TOV"].replace(0, np.nan)
# Get top playmakers
playmakers = filtered_df.sort_values("AST_TO", ascending=False).head(10)
# Clean readable output
print(
playmakers[["Player", "AST", "TOV", "AST_TO"]]
.to_string(index=False)
)
Player AST TOV AST_TO
Jimmy Butler 57 13 4.384615
Mike Conley 50 12 4.166667
Tyrese Haliburton 197 53 3.716981
Cason Wallace 48 16 3.000000
Derrick White 39 13 3.000000
Jalen Williams 111 41 2.707317
Alex Caruso 51 19 2.684211
Pascal Siakam 78 30 2.600000
Andrew Nembhard 107 42 2.547619
Shai Gilgeous-Alexander 150 60 2.500000
In [21]:
# =====================================================================
# 5. REBOUNDERS – DOMINANCE ON THE GLASS
# =====================================================================
rebounders = df.sort_values("TRB", ascending=False).head(10)
print(rebounders[["Player", "TRB"]])
Player TRB 5 Karl-Anthony Towns 209 8 Chet Holmgren 199 7 Nikola Jokić 178 28 Isaiah Hartenstein 173 26 Josh Hart 158 3 Pascal Siakam 146 13 Aaron Nesmith 130 56 Rudy Gobert 129 2 Jalen Williams 127 75 Mitchell Robinson 127
In [106]:
# =====================================================================
# 6. 3-POINT SHOOTERS
# =====================================================================
three_point_shooters = df.sort_values("3PM", ascending=False).head(10)
print(three_point_shooters[["Player", "3PM", "3P%"]])
Player 3PM 3P% 13 Aaron Nesmith 60 0.491803 4 Tyrese Haliburton 54 0.339623 29 Luguentz Dort 48 0.342857 1 Jalen Brunson 48 0.358209 6 Anthony Edwards 46 0.353846 12 OG Anunoby 41 0.338843 27 Derrick White 40 0.384615 14 Andrew Nembhard 40 0.465116 24 Alex Caruso 37 0.411111 2 Jalen Williams 35 0.304348
In [23]:
import matplotlib.pyplot as plt
# Make sure the 3PM column exists
df["3PM"] = df["3P"]
# Recreate the three_point DataFrame
three_point = df.sort_values("3PM", ascending=False).head(10)
plt.figure(figsize=(10,6))
plt.stem(three_point["Player"], three_point["3PM"], linefmt='blue', markerfmt='o', basefmt=" ")
plt.title("Top 3-Point Shooters (3PM)")
plt.xlabel("Player")
plt.ylabel("3PM")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()
In [25]:
# =====================================================================
# 7. DEFENSIVE IMPACT – STEALS + BLOCKS
# =====================================================================
df["DEF_IMPACT"] = df["STL"] + df["BLK"]
defenders = df.sort_values("DEF_IMPACT", ascending=False).head(10)
print(defenders[["Player", "STL", "BLK", "DEF_IMPACT"]])
Player STL BLK DEF_IMPACT 8 Chet Holmgren 17 43 60 0 Shai Gilgeous-Alexander 38 20 58 10 Myles Turner 12 46 58 24 Alex Caruso 45 13 58 12 OG Anunoby 36 22 58 15 Mikal Bridges 30 17 47 4 Tyrese Haliburton 30 15 45 3 Pascal Siakam 28 16 44 50 Cason Wallace 32 10 42 2 Jalen Williams 32 9 41
In [31]:
# =====================================================================
# 8. ALL-AROUND PLAYERS – COMPLETE STAT INDEX
# =====================================================================
# Create weighted all-around score
df["AllAroundScore"] = (
df["PTS"] * 0.4 +
df["REB"] * 0.2 +
df["AST"] * 0.25 +
(df["STL"] + df["BLK"]) * 0.15
)
# Top 10 all-around players
all_around = df.sort_values("AllAroundScore", ascending=False).head(10)
# Clean output
print(
all_around[["Player", "PTS", "REB", "AST", "STL", "BLK", "AllAroundScore"]]
.to_string(index=False)
)
Player PTS REB AST STL BLK AllAroundScore
Shai Gilgeous-Alexander 688 123 150 38 20 346.00
Jalen Brunson 530 62 126 8 5 257.85
Jalen Williams 492 127 111 32 9 256.10
Pascal Siakam 472 146 78 28 16 244.10
Tyrese Haliburton 399 123 197 30 15 240.20
Nikola Jokić 367 178 112 28 12 216.40
Karl-Anthony Towns 386 209 24 13 12 205.95
Anthony Edwards 380 117 82 17 11 200.10
Chet Holmgren 349 199 22 17 43 193.90
Julius Randle 326 88 74 12 2 168.60
In [29]:
import matplotlib.pyplot as plt
# Make sure the AllAroundScore exists
df["AllAroundScore"] = (
df["PTS"] * 0.4 +
df["REB"] * 0.2 +
df["AST"] * 0.25 +
(df["STL"] + df["BLK"]) * 0.15
)
# Top 10 all-around players
all_around = df.sort_values("AllAroundScore", ascending=False).head(10)
# Line Chart
plt.figure(figsize=(12,6))
plt.plot(all_around["Player"], all_around["AllAroundScore"], marker='o', linestyle='-', color='red')
plt.title("Top All-Around Players (Complete Stat Index) – Line Chart", fontsize=14)
plt.xlabel("Player")
plt.ylabel("All-Around Score")
plt.xticks(rotation=45, ha='right')
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.show()
In [57]:
# =====================================================================
# 9. LEAGUE AVERAGES – CONTEXT
# =====================================================================
league_averages = df.mean(numeric_only=True)
print(league_averages)
Rk 100.500000 Age 26.700000 G 8.805000 GS 4.200000 MP 202.095000 FG 32.690000 FGA 71.720000 FG% 0.466366 3P 10.450000 3PA 29.470000 3P% 0.350057 2P 22.240000 2PA 42.250000 2P% 0.530186 eFG% 0.545370 FT 15.480000 FTA 19.975000 FT% 0.735293 ORB 9.205000 DRB 26.385000 TRB 35.590000 AST 18.980000 STL 6.640000 BLK 4.155000 TOV 10.960000 PF 17.375000 PTS 91.310000 Trp-Dbl 0.040000 3PM 10.450000 REB 35.590000 DEF_IMPACT 10.795000 AllAroundScore 50.006250 dtype: float64
In [112]:
# =====================================================================
# 10. STATISTICAL OUTLIERS – Z-SCORES
# =====================================================================
from scipy.stats import zscore
numeric_cols = df.select_dtypes(include=[np.number]).columns
df_z = df[numeric_cols].apply(zscore)
df["Outlier"] = (df_z.abs() > 2.5).any(axis=1)
outliers = df[df["Outlier"] == True][["Player"] + list(numeric_cols)]
print(outliers)
Player Rk Age G GS MP FG FGA FG% 3P \
0 Shai Gilgeous-Alexander 1 26 23 23 851 233 504 0.462302 32
4 Tyrese Haliburton 5 24 23 23 772 146 315 0.463492 54
5 Karl-Anthony Towns 6 29 18 18 639 139 285 0.487719 26
7 Nikola Jokić 8 29 14 14 563 131 268 0.488806 27
8 Chet Holmgren 9 22 23 23 686 123 266 0.462406 27
10 Myles Turner 11 28 23 23 675 106 219 0.484018 31
13 Aaron Nesmith 14 25 23 23 650 100 212 0.471698 60
24 Alex Caruso 25 30 23 0 562 72 160 0.450000 37
25 Jimmy Butler 26 35 11 11 397 68 152 0.447368 11
28 Isaiah Hartenstein 29 26 23 20 516 83 134 0.619403 0
60 Isaiah Joe 61 25 21 0 211 36 73 0.493151 23
67 Mike Conley 68 37 15 15 356 29 96 0.302083 20
69 Al Horford 70 38 11 9 348 34 72 0.472222 14
73 Kristaps Porziņģis 74 29 11 7 231 25 79 0.316456 4
75 Mitchell Robinson 76 26 18 4 370 31 51 0.607843 0
89 Peyton Watson 90 22 14 0 199 24 59 0.406780 7
... BLK TOV PF PTS Trp-Dbl 3PM REB EliteScore AST_TO \
0 ... 20 60 65 688 0 32 123 318.063492 2.500000
4 ... 15 53 39 399 1 54 123 184.933333 3.716981
5 ... 12 40 78 386 0 26 209 188.259649 0.600000
7 ... 12 55 48 367 3 27 178 179.391791 2.036364
8 ... 43 28 47 349 0 27 199 161.379699 0.785714
10 ... 46 38 68 317 0 31 110 153.433790 0.842105
13 ... 18 23 95 291 0 60 130 137.264151 1.130435
24 ... 13 19 63 212 0 37 62 95.400000 2.684211
25 ... 3 13 19 211 0 11 73 94.394737 4.384615
28 ... 12 27 57 186 0 0 173 115.208955 1.888889
60 ... 2 7 13 107 0 23 29 52.767123 2.000000
67 ... 3 12 21 90 0 20 44 27.187500 4.166667
69 ... 14 7 30 88 0 14 66 41.555556 2.857143
73 ... 9 10 26 85 0 4 51 26.898734 0.800000
75 ... 14 13 38 84 0 0 127 51.058824 0.538462
89 ... 9 8 21 63 0 7 41 25.627119 0.500000
DEF_IMPACT
0 58
4 45
5 25
7 40
8 60
10 58
13 38
24 58
25 17
28 31
60 9
67 12
69 21
73 19
75 31
89 20
[16 rows x 34 columns]