Simple Zone Rating

This is my zone rating system, as written about in The Hardball Times. So far, only the Retrosheet version of SZR is being made available.

WARNING: Some of these queries can take a long time to run!

CREATE TABLE bat_hand_averages AS
SELECT YEAR_ID
    , BAT_HAND_CD
    , PIT_HAND_CD
    , AVG(IF(FLD_CD="1",1,0)) AS FLD1
    , AVG(IF(FLD_CD="2",1,0)) AS FLD2
    , AVG(IF(FLD_CD="3",1,0)) AS FLD3
    , AVG(IF(FLD_CD="4",1,0)) AS FLD4
    , AVG(IF(FLD_CD="5",1,0)) AS FLD5
    , AVG(IF(FLD_CD="6",1,0)) AS FLD6
    , AVG(IF(FLD_CD="7",1,0)) AS FLD7
    , AVG(IF(FLD_CD="8",1,0)) AS FLD8
    , AVG(IF(FLD_CD="9",1,0)) AS FLD9
FROM retrosheet.events
WHERE BAT_EVENT_FL = "T"
AND BAT_DEST_ID = "0"
AND EVENT_CD != "3"
GROUP BY YEAR_ID, BAT_HAND_CD, PIT_HAND_CD;
 
CREATE INDEX bat_hand_averages_idx 
ON bat_hand_averages (YEAR_ID, BAT_HAND_CD, PIT_HAND_CD);
 
CREATE TABLE BAT_AiB_RAW AS
SELECT YEAR_ID
    , BAT_ID
    , SUM(IF(BATTEDBALL_CD!="",1,0)) AS BAT_OPS
    , SUM(IF(BATTEDBALL_CD IN ("F","L","P"),1,0))/SUM(IF(BATTEDBALL_CD!="",1,0)) AS AiB_RATE
FROM retrosheet.events
GROUP BY YEAR_ID, BAT_ID;
 
CREATE TABLE PIT_AiB_RAW AS
SELECT YEAR_ID
    , PIT_ID
    , SUM(IF(BATTEDBALL_CD!="",1,0)) AS PIT_OPS
    , SUM(IF(BATTEDBALL_CD IN ("F","L","P"),1,0))/SUM(IF(BATTEDBALL_CD!="",1,0)) AS AiB_RATE
FROM retrosheet.events
GROUP BY YEAR_ID, PIT_ID;
 
CREATE TABLE BAT_AiB_AVG AS
SELECT *
    , - lgAiB_OPS_BAT * (R-1)/ R AS C
FROM (SELECT YEAR_ID
    , (SUM(AiB_RATE*BAT_OPS)/SUM(BAT_OPS)) AS lgAiB_RATE_BAT
    , AVG(BAT_OPS) AS lgAiB_OPS_BAT
    , VAR_SAMP(AiB_RATE) AS VAR_OBS
    , (SUM(AiB_RATE*BAT_OPS)/SUM(BAT_OPS))*(1 - (SUM(AiB_RATE*BAT_OPS)/SUM(BAT_OPS)))/AVG(BAT_OPS) AS VAR_RAND
    , (VAR_SAMP(AiB_RATE)- (SUM(AiB_RATE*BAT_OPS)/SUM(BAT_OPS))*(1 - (SUM(AiB_RATE*BAT_OPS)/SUM(BAT_OPS)))/AVG(BAT_OPS))/ VAR_SAMP(AiB_RATE) AS R
FROM bat_aib_raw
GROUP BY YEAR_ID) A;
 
CREATE TABLE PIT_AiB_AVG AS
SELECT *
    , - lgAiB_OPS_PIT * (R-1)/ R AS C
FROM (SELECT YEAR_ID
    , (SUM(AiB_RATE*PIT_OPS)/SUM(PIT_OPS)) AS lgAiB_RATE_PIT
    , AVG(PIT_OPS) AS lgAiB_OPS_PIT
    , VAR_SAMP(AiB_RATE) AS VAR_OBS
    , (SUM(AiB_RATE*pit_OPS)/SUM(pit_OPS))*(1 - (SUM(AiB_RATE*pit_OPS)/SUM(pit_OPS)))/AVG(pit_OPS) AS VAR_RAND
    , (VAR_SAMP(AiB_RATE)- (SUM(AiB_RATE*pit_OPS)/SUM(pit_OPS))*(1 - (SUM(AiB_RATE*pit_OPS)/SUM(pit_OPS)))/AVG(pit_OPS))/ VAR_SAMP(AiB_RATE) AS R
FROM pit_aib_raw
GROUP BY YEAR_ID) A;
 
CREATE TABLE bat_aib AS
SELECT r.*
    , lgAiB_RATE_BAT
    , (COALESCE(AiB_RATE,0) * BAT_OPS + lgAiB_RATE_BAT * C) / (BAT_OPS + C) AS AiB_RATE_R
FROM bat_aib_avg a, bat_aib_raw r
WHERE a.YEAR_ID = r.YEAR_ID;
 
CREATE TABLE pit_aib AS
SELECT r.*
    , (COALESCE(AiB_RATE,0) * PIT_OPS + lgAiB_RATE_PIT * C) / (PIT_OPS + C) AS AiB_RATE_R
FROM pit_aib_avg a, pit_aib_raw r
WHERE a.YEAR_ID = r.YEAR_ID;
 
CREATE INDEX bat_aib_idx
ON bat_aib (BAT_ID, YEAR_ID);
 
CREATE INDEX pit_aib_idx
ON pit_aib (PIT_ID, YEAR_ID);
 
CREATE TABLE infield_plays_chances AS
SELECT POS3_FLD_ID AS FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , "3" AS POS
    , SUM(IF(FLD_CD = "3" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "3" OR ASS1_FLD_CD = "3"),1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD3 * (1 - (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "3" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "3" OR ASS1_FLD_CD = "3" OR ERR1_FLD_CD = "3") THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS3_FLD_ID, e.YEAR_ID, FLD_TEAM_ID
UNION ALL
SELECT POS4_FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , "4" AS POS
    , SUM(IF(FLD_CD = "4" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "4" OR ASS1_FLD_CD = "4"),1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD4 * (1 - (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "4" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "4" OR ASS1_FLD_CD = "4" OR ERR1_FLD_CD = "4") THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS4_FLD_ID, e.YEAR_ID, FLD_TEAM_ID
UNION ALL
SELECT POS5_FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , "5" AS POS
    , SUM(IF(FLD_CD = "5" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "5" OR ASS1_FLD_CD = "5"),1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD5 * (1 - (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "5" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "5" OR ASS1_FLD_CD = "5" OR ERR1_FLD_CD = "5") THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS5_FLD_ID, e.YEAR_ID, FLD_TEAM_ID
UNION ALL
SELECT POS6_FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , "6" AS POS
    , SUM(IF(FLD_CD = "6" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "6" OR ASS1_FLD_CD = "6"),1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD6 * (1 - (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "6" AND BATTEDBALL_CD NOT IN ("F","L","P") AND (PO1_FLD_CD = "6" OR ASS1_FLD_CD = "6" OR ERR1_FLD_CD = "6") THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS6_FLD_ID, e.YEAR_ID, FLD_TEAM_ID;
 
CREATE TABLE avg_infield_zr AS
SELECT YEAR_ID
    , POS
    , SUM(PM) AS PM
    , SUM(CHANCES) AS CH
    , SUM(PM)/SUM(CHANCES) AS ZR
FROM infield_plays_chances
GROUP BY YEAR_ID, POS;
 
CREATE INDEX avg_infield_zr_idx
ON avg_infield_zr (YEAR_ID, POS);
 
CREATE TABLE infield_pm AS
SELECT i.FLD_ID
    , i.YEAR_ID
    , i.FLD_TEAM_ID
    , i.POS
    , i.PM
    , i.CHANCES
    , i.PM/i.CHANCES AS ZR
    , i.PM - i.CHANCES*a.ZR AS PLUS_MINUS
FROM infield_plays_chances i, avg_infield_zr a
WHERE i.YEAR_ID = a.YEAR_ID
AND i.POS = a.POS
ORDER BY PLUS_MINUS DESC;
 
CREATE TABLE outfield_plays_chances AS
SELECT POS7_FLD_ID AS FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , HOME_TEAM_ID
    , "7" AS POS
    , SUM(IF(FLD_CD = "7" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "7",1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD7 * (1 + (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "7" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "7" THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS7_FLD_ID, e.HOME_TEAM_ID, e.YEAR_ID, FLD_TEAM_ID
UNION ALL
SELECT POS8_FLD_ID AS FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , HOME_TEAM_ID
    , "8" AS POS
    , SUM(IF(FLD_CD = "8" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "8",1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD8 * (1 + (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "8" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "8" THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS8_FLD_ID, e.HOME_TEAM_ID, e.YEAR_ID, FLD_TEAM_ID
UNION ALL
SELECT POS9_FLD_ID AS FLD_ID
    , e.YEAR_ID
    , FLD_TEAM_ID
    , HOME_TEAM_ID
    , "9" AS POS
    , SUM(IF(FLD_CD = "9" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "9",1,0)) AS PM
    , SUM(CASE WHEN H_CD BETWEEN 1 AND 3 THEN a.FLD9 * (1 + (b.AiB_RATE_R + p.AiB_RATE_R - b.lgAiB_RATE_BAT * 2))
        WHEN FLD_CD = "9" AND BATTEDBALL_CD != "G" AND PO1_FLD_CD = "9" THEN 1
        ELSE 0 END) AS CHANCES
FROM retrosheet.events e, bat_hand_averages a, bat_aib b, pit_aib p
WHERE e.YEAR_ID = a.YEAR_ID
AND e.BAT_HAND_CD = a.BAT_HAND_CD
AND e.PIT_HAND_CD = a.PIT_HAND_CD
AND e.BAT_ID = b.BAT_ID
AND e.YEAR_ID = b.YEAR_ID
AND e.PIT_ID = p.PIT_ID
AND e.YEAR_ID = p.YEAR_ID
GROUP BY POS9_FLD_ID, e.HOME_TEAM_ID, e.YEAR_ID, FLD_TEAM_ID;
 
CREATE TABLE team_leagues AS
SELECT t.YEAR_ID, t.TEAM_ID, a.NUM
FROM retrosheet.teams t
, (SELECT YEAR_ID, LG_ID, COUNT(1) AS NUM FROM retrosheet.teams GROUP BY YEAR_ID, LG_ID) a
WHERE t.YEAR_ID = a.YEAR_ID
AND t.LG_ID = a.LG_ID;
 
CREATE INDEX outfield_plays_chances_idx
ON outfield_plays_chances(YEAR_ID, HOME_TEAM_ID);
 
CREATE INDEX team_leagues_idx
ON team_leagues (YEAR_ID, TEAM_ID);
 
CREATE TABLE avg_outfield_zr AS
SELECT o.YEAR_ID
    , HOME_TEAM_ID
    , POS
    , SUM(IF(FLD_TEAM_ID = HOME_TEAM_ID,PM,0)) * 1/t.NUM + SUM(IF(FLD_TEAM_ID != HOME_TEAM_ID,PM,0)) AS PM
    , SUM(IF(FLD_TEAM_ID = HOME_TEAM_ID,CHANCES,0)) * 1/t.NUM + SUM(IF(FLD_TEAM_ID != HOME_TEAM_ID,CHANCES,0)) AS CH
    , (SUM(IF(FLD_TEAM_ID = HOME_TEAM_ID,PM,0)) * 1/t.NUM + SUM(IF(FLD_TEAM_ID != HOME_TEAM_ID,PM,0)))/(SUM(IF(FLD_TEAM_ID = HOME_TEAM_ID,CHANCES,0)) * 1/t.NUM + SUM(IF(FLD_TEAM_ID != HOME_TEAM_ID,CHANCES,0))) AS ZR
FROM outfield_plays_chances o, team_leagues t
WHERE o.YEAR_ID = t.YEAR_ID
AND o.HOME_TEAM_ID = t.TEAM_ID
GROUP BY o.YEAR_ID, POS, HOME_TEAM_ID;
 
CREATE INDEX avg_outfield_zr_idx
ON avg_outfield_zr (YEAR_ID, HOME_TEAM_ID);
 
CREATE TABLE outfield_pm AS
SELECT i.FLD_ID
    , i.YEAR_ID
    , i.FLD_TEAM_ID
    , i.POS
    , SUM(i.PM) AS PM
    , SUM(i.CHANCES) AS CHANCES
    , SUM(i.PM)/SUM(i.CHANCES) AS ZR
    , SUM(i.PM) - SUM(i.CHANCES)*SUM(a.ZR*i.CHANCES)/SUM(i.CHANCES) AS PLUS_MINUS
FROM outfield_plays_chances i, avg_outfield_zr a
WHERE i.YEAR_ID = a.YEAR_ID
AND i.POS = a.POS
AND i.HOME_TEAM_ID = a.HOME_TEAM_ID
GROUP BY i.FLD_ID, i.YEAR_ID, i.FLD_TEAM_ID, i.POS
ORDER BY PLUS_MINUS DESC;
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License