patrickramos commited on
Commit
1f837c6
·
1 Parent(s): 82df431

Add Strike%, F-Str%, PAR%, and Behind%

Browse files
Files changed (3) hide show
  1. pitch_leaderboard.py +3 -3
  2. player_team_leaderboard.py +3 -3
  3. stats.py +19 -5
pitch_leaderboard.py CHANGED
@@ -10,9 +10,9 @@ from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
10
  from convert import ball_kind, ball_kind_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
11
  from plotting import stat_cmap
12
 
13
- STATS = ['Count', 'Usage', 'Avg Velo', 'Max Velo', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
14
- PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
15
- STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%']
16
  COLUMNS = ['Pitcher', 'Team', 'Throws', 'Pitch', 'Pitch (General)'] + STATS
17
 
18
  PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
 
10
  from convert import ball_kind, ball_kind_to_color, get_text_color_from_color, team_names_short_to_color, get_text_color_from_team
11
  from plotting import stat_cmap
12
 
13
+ STATS = ['Count', 'Usage', 'Avg Velo', 'Max Velo', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%', 'Behind%']
14
+ PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%', 'Behind%']
15
+ STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%']
16
  COLUMNS = ['Pitcher', 'Team', 'Throws', 'Pitch', 'Pitch (General)'] + STATS
17
 
18
  PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
player_team_leaderboard.py CHANGED
@@ -57,9 +57,9 @@ def create_player_team_leaderboard_app(player_team_type):
57
 
58
  # stats
59
  if pitching:
60
- pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
61
- stats_with_pctls = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%']
62
- cols = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF', 'FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
63
  if team:
64
  cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
65
  else:
 
57
 
58
  # stats
59
  if pitching:
60
+ pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%', 'Behind%']
61
+ stats_with_pctls = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'F-Str%', 'PAR%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Behind%']
62
+ cols = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF', 'FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%', 'Behind%']
63
  if team:
64
  cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
65
  else:
stats.py CHANGED
@@ -18,6 +18,12 @@ whiff = (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%')
18
  swstr = (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%')
19
  csw = (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
20
  ball = ((pl.col('presult') == 'Ball').sum() / pl.col('pitch').sum()).alias('Ball%')
 
 
 
 
 
 
21
  zone = (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%')
22
  glove = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%')
23
  arm = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%')
@@ -139,13 +145,17 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
139
  whiff,
140
  swstr,
141
  csw,
 
142
  ball,
 
 
143
  zone,
144
  glove,
145
  arm,
146
  high,
147
  low,
148
  mm,
 
149
  )
150
  .with_columns(
151
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
@@ -162,8 +172,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
162
  )
163
  .drop('G', 'F', 'B', 'P', 'L', 'null')
164
  .with_columns(
165
- (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=((stat in ['FB%', 'LD%', 'Ball%'] or 'Contact%' in stat)))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
166
- for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%']
167
  )
168
  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
169
  .sort(id_cols[0], 'count', descending=[False, True])
@@ -224,9 +234,9 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
224
 
225
  # percentile ascending/descending
226
  if pitching:
227
- stat_descending_pctl = lambda stat: stat in ['BB%', 'Ball%', 'FB%', 'LD%', 'Z-Swing%', 'OBP'] or 'Contact%' in stat
228
  else:
229
- stat_descending_pctl = lambda stat: not (stat in ['BB%', 'Ball%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%', 'OBP'] or 'Contact%' in stat)
230
 
231
  # col names
232
  match player_type:
@@ -273,13 +283,17 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
273
  whiff,
274
  swstr,
275
  csw,
 
276
  ball,
 
 
277
  zone,
278
  glove,
279
  arm,
280
  high,
281
  low,
282
  mm,
 
283
  pl.col('AB').first(),
284
  h,
285
  bb,
@@ -300,7 +314,7 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
300
  .drop('G', 'F', 'B', 'P', 'L')
301
  .with_columns(
302
  (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
303
- for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Ball%', 'GB%', 'FB%', 'LD%', 'Zone%', 'OBP']
304
  )
305
  .sort(qual_col, descending=True)
306
  )
 
18
  swstr = (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%')
19
  csw = (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
20
  ball = ((pl.col('presult') == 'Ball').sum() / pl.col('pitch').sum()).alias('Ball%')
21
+ strike = ((pl.col('pitch') & (pl.col('presult') != 'Ball')).sum() / pl.col('pitch').sum()).alias('Strike%')
22
+ is_two_str = pl.col('before_s') == 2 # named this way in case I use two_str for 2-Str%
23
+ first_count = (pl.col('before_s') == 0) & (pl.col('before_b') == 0)
24
+ f_strike = ((pl.col('csw') & first_count).sum() / first_count.sum()).alias('F-Str%')
25
+ par = (((is_two_str & pl.col('presult').str.contains('strikeout')).sum()) / is_two_str.sum()).alias('PAR%')
26
+ behind = (((pl.col('before_b') > pl.col('before_s')) & (pl.col('before_s') < 2) & (pl.col('before_b') > 1)).sum() / pl.len()).alias('Behind%')
27
  zone = (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%')
28
  glove = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%')
29
  arm = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%')
 
145
  whiff,
146
  swstr,
147
  csw,
148
+ strike,
149
  ball,
150
+ f_strike,
151
+ par,
152
  zone,
153
  glove,
154
  arm,
155
  high,
156
  low,
157
  mm,
158
+ behind
159
  )
160
  .with_columns(
161
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
 
172
  )
173
  .drop('G', 'F', 'B', 'P', 'L', 'null')
174
  .with_columns(
175
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=((stat in ['FB%', 'LD%', 'Ball%', 'Behind%'] or 'Contact%' in stat)))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
176
+ for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Behind%']
177
  )
178
  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
179
  .sort(id_cols[0], 'count', descending=[False, True])
 
234
 
235
  # percentile ascending/descending
236
  if pitching:
237
+ stat_descending_pctl = lambda stat: stat in ['BB%', 'Ball%', 'FB%', 'LD%', 'Z-Swing%', 'Behind%', 'OBP'] or 'Contact%' in stat
238
  else:
239
+ stat_descending_pctl = lambda stat: not (stat in ['BB%', 'Ball%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%', 'Behind%', 'OBP'] or 'Contact%' in stat)
240
 
241
  # col names
242
  match player_type:
 
283
  whiff,
284
  swstr,
285
  csw,
286
+ strike,
287
  ball,
288
+ f_strike,
289
+ par,
290
  zone,
291
  glove,
292
  arm,
293
  high,
294
  low,
295
  mm,
296
+ behind,
297
  pl.col('AB').first(),
298
  h,
299
  bb,
 
314
  .drop('G', 'F', 'B', 'P', 'L')
315
  .with_columns(
316
  (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
317
+ for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'Strike%', 'Ball%', 'F-Str%', 'PAR%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Behind%', 'OBP']
318
  )
319
  .sort(qual_col, descending=True)
320
  )