Agency Separations Analysis

Author

Abigail Haddad

Published

July 28, 2025

THIS IS A DRAFT! Please talk to me before you use this.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from great_tables import GT
from pathlib import Path
import gc
import plotly.express as px

# Set up pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Constants
EMPLOYMENT_PARQUET_DIR = "../../employment_cube/parquet/"
SEPARATIONS_PARQUET_DIR = "../../separations_accessions/parquet/"
MIN_EMPLOYEES = 5000
TOP_N_AGENCIES = 50

# Color configuration
HIGH_SEPARATIONS_COLOR = "#d62728"
LOW_SEPARATIONS_COLOR = "#2ca02c"
NET_POSITIVE_COLOR = "#2ca02c"
NET_NEGATIVE_COLOR = "#d62728"
ACCESSIONS_COLOR = "#1f77b4"
SEPARATIONS_COLOR = "#ff7f0e"

# Utility Functions
def clean_employment(x):
    """Convert employment values to numeric, handling REDACTED and ***** values"""
    if pd.isna(x) or x in ['REDACTED', '*****', '']:
        return 0
    try:
        return int(x)
    except:
        return 0

def calculate_fiscal_year(efdate):
    """Convert efdate (YYYYMM) to fiscal year.
    Fiscal year runs Oct-Sept, so Oct 2018 (201810) = FY2019"""
    efdate_str = str(efdate)
    year = int(efdate_str[:4])
    month = int(efdate_str[4:6])
    # Oct-Dec of year X = FY X+1
    return year + 1 if month >= 10 else year

Data Loading and Processing

Show code

def load_employment_data(fiscal_years, parquet_dir):
    """Load September employment data for fiscal year baselines."""
    employment_aggregated = []
    employment_tenure_aggregated = []
    agency_lookups = None
    loading_summary = []
    
    for fy in fiscal_years:
        baseline_year = fy - 1
        file_path = f"fedscope_employment_September_{baseline_year}.parquet"
        try:
            df = pd.read_parquet(parquet_dir + file_path)
            raw_count = len(df)
            
            df['employment_num'] = df['employment'].apply(clean_employment)
            
            if agency_lookups is None:
                agency_lookups = df[['agysub', 'agysubt']].drop_duplicates()
            
            # Overall employment
            grouped = df.groupby(['agysub']).agg({'employment_num': 'sum'}).reset_index()
            grouped['fiscal_year'] = fy
            grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_aggregated.append(grouped)
            
            # Tenure-specific employment (A and B levels)
            tenure_grouped = df[df['loslvl'].isin(['A', 'B'])].groupby(['agysub', 'loslvl']).agg({
                'employment_num': 'sum'
            }).reset_index()
            tenure_grouped['fiscal_year'] = fy
            tenure_grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_tenure_aggregated.append(tenure_grouped)
            
            loading_summary.append({
                'Year': f'FY{fy}',
                'Data Type': 'Employment (Sept)',
                'Raw Records': raw_count,
                'Aggregated Groups': len(grouped)
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(employment_aggregated, ignore_index=True),
            pd.concat(employment_tenure_aggregated, ignore_index=True),
            agency_lookups,
            loading_summary)

def load_separations_data(fiscal_years, parquet_dir):
    """Load separations data from multiple files."""
    separations_files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    separations_aggregated = []
    separations_tenure_aggregated = []
    loading_summary = []
    
    for file in separations_files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            raw_count = len(df)
            
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            df = df[df['fiscal_year'].isin(fiscal_years)]
            
            # Overall separations
            grouped = df.groupby(['fiscal_year', 'agysub']).size().reset_index(name='separations')
            separations_aggregated.append(grouped)
            
            # Tenure-specific separations
            tenure_seps = df[df['loslvl'].isin(['A', 'B'])].groupby(
                ['fiscal_year', 'agysub', 'loslvl']
            ).size().reset_index(name='separations')
            separations_tenure_aggregated.append(tenure_seps)
            
            year_range = 'March 2025' if 'March_2025' in file else (
                '2015-2019' if '2015-2019' in file else '2020-2024'
            )
            loading_summary.append({
                'Year': year_range,
                'Data Type': 'Separations',
                'Raw Records': raw_count,
                'Aggregated Groups': len(grouped)
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(separations_aggregated, ignore_index=True),
            pd.concat(separations_tenure_aggregated, ignore_index=True),
            loading_summary)

def load_accessions_data(fiscal_years, parquet_dir):
    """Load accessions data from multiple files."""
    accessions_files = [
        "fedscope_accessions_FY2015-2019.parquet",
        "fedscope_accessions_FY2020-2024.parquet",
        "fedscope_accessions_March_2025.parquet"
    ]
    
    accessions_aggregated = []
    loading_summary = []
    
    for file in accessions_files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            raw_count = len(df)
            
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            
            if 'March_2025' not in file:
                df = df[df['fiscal_year'].isin(fiscal_years)]
            
            grouped = df.groupby(['fiscal_year', 'agysub']).size().reset_index(name='accessions')
            
            if len(grouped) > 0:
                accessions_aggregated.append(grouped)
                aggregated_count = len(grouped)
            else:
                aggregated_count = 0
            
            year_range = 'March 2025' if 'March_2025' in file else (
                '2015-2019' if '2015-2019' in file else '2020-2024'
            )
            loading_summary.append({
                'Year': year_range,
                'Data Type': 'Accessions',
                'Raw Records': raw_count,
                'Aggregated Groups': aggregated_count
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return pd.concat(accessions_aggregated, ignore_index=True), loading_summary

def load_tenure_2plus_data(fiscal_years, employment_parquet_dir, separations_parquet_dir):
    """Load employment and separations data for employees with 2+ years tenure."""
    employment_2plus_aggregated = []
    
    for fy in fiscal_years:
        baseline_year = fy - 1
        file_path = f"fedscope_employment_September_{baseline_year}.parquet"
        try:
            df = pd.read_parquet(employment_parquet_dir + file_path)
            df['employment_num'] = df['employment'].apply(clean_employment)
            
            tenure_2plus = df[~df['loslvl'].isin(['A', 'B'])].groupby(['agysub']).agg({
                'employment_num': 'sum'
            }).reset_index()
            tenure_2plus['fiscal_year'] = fy
            tenure_2plus['loslvl'] = 'C+'
            tenure_2plus.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_2plus_aggregated.append(tenure_2plus)
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    separations_files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    separations_2plus_aggregated = []
    for file in separations_files:
        try:
            df = pd.read_parquet(separations_parquet_dir + file)
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            df = df[df['fiscal_year'].isin(fiscal_years)]
            tenure_2plus_seps = df[~df['loslvl'].isin(['A', 'B'])].groupby(
                ['fiscal_year', 'agysub']
            ).size().reset_index(name='separations')
            tenure_2plus_seps['loslvl'] = 'C+'
            separations_2plus_aggregated.append(tenure_2plus_seps)
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(employment_2plus_aggregated, ignore_index=True),
            pd.concat(separations_2plus_aggregated, ignore_index=True))

def load_monthly_data_for_heatmaps(parquet_dir):
    """Load monthly accessions and separations data for heatmaps."""
    files = [
        "fedscope_accessions_FY2015-2019.parquet",
        "fedscope_accessions_FY2020-2024.parquet",
        "fedscope_accessions_March_2025.parquet"
    ]
    
    monthly_accessions_data = {}
    for file in files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            df['year'] = df['efdate'].astype(str).str[:4].astype(int)
            df['month'] = df['efdate'].astype(str).str[4:6].astype(int)
            df = df[df['year'].between(2016, 2025)]
            monthly_counts = df.groupby(['agysub', 'year', 'month']).size().reset_index(name='count')
            for agysub in monthly_counts['agysub'].unique():
                if agysub not in monthly_accessions_data:
                    monthly_accessions_data[agysub] = []
                monthly_accessions_data[agysub].append(monthly_counts[monthly_counts['agysub'] == agysub])
            del df
            gc.collect()
        except Exception as e:
            continue
    
    # Same for separations
    files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    monthly_separations_data = {}
    for file in files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            df['year'] = df['efdate'].astype(str).str[:4].astype(int)
            df['month'] = df['efdate'].astype(str).str[4:6].astype(int)
            df = df[df['year'].between(2016, 2025)]
            monthly_counts = df.groupby(['agysub', 'year', 'month']).size().reset_index(name='count')
            for agysub in monthly_counts['agysub'].unique():
                if agysub not in monthly_separations_data:
                    monthly_separations_data[agysub] = []
                monthly_separations_data[agysub].append(monthly_counts[monthly_counts['agysub'] == agysub])
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return monthly_accessions_data, monthly_separations_data

def load_employment_time_series(employment_parquet_dir, years_range):
    """Load March and September employment data for time series."""
    employment_time_series_data = {}
    
    for year in years_range:
        # March data
        march_file = f"fedscope_employment_March_{year}.parquet"
        march_path = employment_parquet_dir + march_file
        if Path(march_path).exists():
            try:
                df = pd.read_parquet(march_path)
                df['employment_num'] = df['employment'].apply(clean_employment)
                agency_totals = df.groupby('agysub')['employment_num'].sum().reset_index()
                for _, row in agency_totals.iterrows():
                    agysub = row['agysub']
                    if agysub not in employment_time_series_data:
                        employment_time_series_data[agysub] = []
                    employment_time_series_data[agysub].append({
                        'period': f"March {year}",
                        'year': year,
                        'month': 'March',
                        'employment': int(row['employment_num'])
                    })
                del df
                gc.collect()
            except Exception as e:
                pass
        
        # September data
        sept_file = f"fedscope_employment_September_{year}.parquet"
        sept_path = employment_parquet_dir + sept_file
        if Path(sept_path).exists():
            try:
                df = pd.read_parquet(sept_path)
                df['employment_num'] = df['employment'].apply(clean_employment)
                agency_totals = df.groupby('agysub')['employment_num'].sum().reset_index()
                for _, row in agency_totals.iterrows():
                    agysub = row['agysub']
                    if agysub not in employment_time_series_data:
                        employment_time_series_data[agysub] = []
                    employment_time_series_data[agysub].append({
                        'period': f"September {year}",
                        'year': year,
                        'month': 'September',
                        'employment': int(row['employment_num'])
                    })
                del df
                gc.collect()
            except Exception as e:
                pass
    
    return employment_time_series_data

def get_fiscal_years():
    """Return the fiscal years for analysis."""
    return [2016, 2017, 2018, 2019, 2020, 2021, 2022]

def get_time_series_years():
    """Return the years for time series analysis."""
    return list(range(2013, 2026))

def combine_loading_summaries(emp_summary, sep_summary, acc_summary):
    """Combine all loading summaries into one list."""
    return emp_summary + sep_summary + acc_summary

def combine_tenure_data(employment_tenure_annual, employment_tenure_2plus):
    """Combine tenure annual and 2+ years data."""
    return pd.concat([employment_tenure_annual, employment_tenure_2plus], ignore_index=True)

def load_all_data():
    """Master function to load all data needed for the analysis."""
    fiscal_years = get_fiscal_years()
    time_series_years = get_time_series_years()
    
    employment_annual, employment_tenure_annual, agency_lookups, emp_loading_summary = \
        load_employment_data(fiscal_years, EMPLOYMENT_PARQUET_DIR)
    
    separations_annual, separations_tenure_annual, sep_loading_summary = \
        load_separations_data(fiscal_years, SEPARATIONS_PARQUET_DIR)
    
    accessions_annual, acc_loading_summary = \
        load_accessions_data(fiscal_years, SEPARATIONS_PARQUET_DIR)
    
    employment_tenure_2plus, separations_tenure_2plus = \
        load_tenure_2plus_data(fiscal_years, EMPLOYMENT_PARQUET_DIR, SEPARATIONS_PARQUET_DIR)
    
    monthly_accessions_data, monthly_separations_data = \
        load_monthly_data_for_heatmaps(SEPARATIONS_PARQUET_DIR)
    
    employment_time_series_data = \
        load_employment_time_series(EMPLOYMENT_PARQUET_DIR, time_series_years)
    
    loading_summary = combine_loading_summaries(emp_loading_summary, sep_loading_summary, acc_loading_summary)
    tenure_data = combine_tenure_data(employment_tenure_annual, employment_tenure_2plus)
    
    return {
        'employment_annual': employment_annual,
        'separations_annual': separations_annual,
        'accessions_annual': accessions_annual,
        'tenure_data': tenure_data,
        'separations_tenure_annual': separations_tenure_annual,
        'separations_tenure_2plus': separations_tenure_2plus,
        'agency_lookups': agency_lookups,
        'loading_summary': loading_summary,
        'monthly_accessions_data': monthly_accessions_data,
        'monthly_separations_data': monthly_separations_data,
        'employment_time_series_data': employment_time_series_data
    }

Show code

# Load all data
all_data = load_all_data()

Methodology

How we calculate separation rates:

Separation Rate = (Number of Separations ÷ Total Employment) × 100
Example: If an agency has 10,000 employees and 1,200 separations in a year, the separation rate is 12%

Data timing: - Employment baseline: September data from the previous calendar year (e.g., for FY2016 we use September 2015 employment) - Separations: Count of all employees who left during the fiscal year - Time period: Analysis covers FY2016-FY2022

Minimum thresholds: - Agencies: 5,000+ employees (except Agricultural Marketing Service shown separately) - States: 1,000+ federal employees

Data Loading Summary

Show code

def create_loading_summary_table(loading_summary):
    """Create a formatted table showing data loading summary."""
    loading_df = pd.DataFrame(loading_summary)
    
    gt_loading = (
        GT(loading_df)
        .tab_header(
            title="Data Loading Summary",
            subtitle="Raw records loaded and aggregated by agency"
        )
        .fmt_number(columns=['Raw Records', 'Aggregated Groups'], decimals=0, use_seps=True)
        .cols_width({
            'Year': '100px',
            'Data Type': '120px',
            'Raw Records': '140px',
            'Aggregated Groups': '160px'
        })
    )
    
    return gt_loading

# Display loading summary
create_loading_summary_table(all_data['loading_summary']).show()

Data Loading Summary
Raw records loaded and aggregated by agency
Year	Data Type	Raw Records	Aggregated Groups
FY2016	Employment (Sept)	2,058,924	525
FY2017	Employment (Sept)	2,097,038	533
FY2018	Employment (Sept)	2,087,747	529
FY2019	Employment (Sept)	2,100,802	529
FY2020	Employment (Sept)	2,132,812	527
FY2021	Employment (Sept)	2,181,106	533
FY2022	Employment (Sept)	2,191,011	537
2015-2019	Separations	1,113,647	2,044
2020-2024	Separations	953,844	1,559
2015-2019	Accessions	1,278,187	2,013
2020-2024	Accessions	1,170,401	1,538
March 2025	Accessions	243,563	970

Data Processing and Calculations

Calculate Annual Separation Rates

Show code

def calculate_annual_separation_rates(employment_annual, separations_annual, agency_lookups):
    """Calculate annual separation rates by merging employment and separations data."""
    # Merge employment and separations by fiscal year
    annual_data = pd.merge(
        employment_annual, 
        separations_annual, 
        on=['fiscal_year', 'agysub'],
        how='outer'
    )
    
    # Fill NaN values
    annual_data['employment'] = annual_data['employment'].fillna(0)
    annual_data['separations'] = annual_data['separations'].fillna(0)
    
    # Calculate separation rate (percentage of separations)
    annual_data['separation_rate'] = np.where(
        annual_data['employment'] > 0,
        (annual_data['separations'] / annual_data['employment']) * 100,
        np.nan
    )
    
    # Filter for meaningful sample sizes - keep agencies that meet threshold in ANY year
    agencies_above_threshold = annual_data[annual_data['employment'] >= MIN_EMPLOYEES]['agysub'].unique()
    annual_data = annual_data[annual_data['agysub'].isin(agencies_above_threshold)]
    annual_data = annual_data.dropna(subset=['separation_rate'])
    
    # Merge with agency lookups
    annual_data = pd.merge(annual_data, agency_lookups, on='agysub', how='left')
    
    return annual_data

def calculate_tenure_separation_rates(tenure_data, separations_tenure_annual, 
                                   separations_tenure_2plus, agency_lookups):
    """Calculate tenure-specific separation rates."""
    # Merge A and B tenure data
    tenure_separations = pd.merge(
        tenure_data[tenure_data['loslvl'].isin(['A', 'B'])],
        separations_tenure_annual,
        on=['fiscal_year', 'agysub', 'loslvl'],
        how='outer'
    )
    
    # Merge C+ tenure data
    tenure_2plus_separations = pd.merge(
        tenure_data[tenure_data['loslvl'] == 'C+'],
        separations_tenure_2plus,
        on=['fiscal_year', 'agysub', 'loslvl'],
        how='outer'
    )
    
    # Combine all tenure data
    all_tenure_data = pd.concat([tenure_separations, tenure_2plus_separations], ignore_index=True)
    
    all_tenure_data['employment'] = all_tenure_data['employment'].fillna(0)
    all_tenure_data['separations'] = all_tenure_data['separations'].fillna(0)
    
    all_tenure_data['separation_rate'] = np.where(
        all_tenure_data['employment'] > 0,
        (all_tenure_data['separations'] / all_tenure_data['employment']) * 100,
        np.nan
    )
    
    all_tenure_data = pd.merge(all_tenure_data, agency_lookups, on='agysub', how='left')
    
    return all_tenure_data

# Calculate rates
annual_data = calculate_annual_separation_rates(
    all_data['employment_annual'], 
    all_data['separations_annual'], 
    all_data['agency_lookups']
)

tenure_data = calculate_tenure_separation_rates(
    all_data['tenure_data'],
    all_data['separations_tenure_annual'],
    all_data['separations_tenure_2plus'],
    all_data['agency_lookups']
)

Analysis Results

Top 50 Agencies with Highest Separation Rates

Show code

def get_top_separation_agencies(annual_data, min_employees=5000, top_n=50):
    """Get top N agencies by average separation rate, filtered by minimum employment."""
    # Calculate average statistics across all years for each agency
    agency_avg_stats = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    agency_avg_stats.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separation_rate']
    
    # Filter for agencies with min_employees+ employees
    agencies_filtered = agency_avg_stats[agency_avg_stats['avg_employment'] >= min_employees]
    
    # Sort agencies by average separation rate and get top N
    return agencies_filtered.nlargest(top_n, 'avg_separation_rate').copy()

def create_annual_separations_table(annual_data, high_separation_agencies):
    """Create pivot table of annual separation rates for specified agencies."""
    # Get the annual data for these agencies
    high_separation_annual = annual_data[
        annual_data['agysub'].isin(high_separation_agencies['agysub'])
    ].copy()
    
    # Create pivot table with fiscal years as columns
    annual_pivot = high_separation_annual.pivot_table(
        index=['agysub', 'agysubt'],
        columns='fiscal_year',
        values='separation_rate',
        aggfunc='mean'
    ).reset_index()
    
    # Calculate average for sorting
    year_cols = [col for col in annual_pivot.columns if isinstance(col, (int, float))]
    annual_pivot['avg_rate'] = annual_pivot[year_cols].mean(axis=1)
    
    # Sort by average rate descending
    annual_pivot = annual_pivot.sort_values('avg_rate', ascending=False)
    
    return annual_pivot, year_cols

def format_annual_separations_table(annual_pivot, year_cols):
    """Format the annual separations table for display."""
    display_data = annual_pivot.copy()
    
    # Select columns for display
    table_cols = ['agysubt'] + year_cols + ['avg_rate']
    display_data = display_data[table_cols]
    
    # Rename columns
    new_columns = ['Agency'] + [f'FY{int(col)}' for col in year_cols] + ['Avg']
    display_data.columns = new_columns
    
    # Round all numeric columns to integers
    for col in display_data.columns[1:]:
        display_data[col] = display_data[col].round(0)
        # Fill NaN with 0 before converting to int to avoid Int64 NA issues
        display_data[col] = display_data[col].fillna(0).astype(int)
    
    # Reset index for clean display
    display_data = display_data.reset_index(drop=True)
    
    # Create GT table
    gt_annual = (
        GT(display_data.head(50))
        .tab_header(
            title="Top 50 Agencies with Highest Separation Rates",
            subtitle="Annual separation rates by fiscal year (percentage) - agencies with 5000+ employees - FY2016-FY2022"
        )
        .fmt_number(columns=list(display_data.columns[1:]), decimals=0, pattern="{x}%")
        .data_color(
            columns=list(display_data.columns[1:]),
            palette=["white", HIGH_SEPARATIONS_COLOR],
            domain=[0, display_data[display_data.columns[1:]].max().max()],
            na_color="lightgray"
        )
        .cols_width({
            'Agency': '400px'
        })
    )
    
    return gt_annual

# Execute the analysis
high_separation_agencies = get_top_separation_agencies(annual_data, MIN_EMPLOYEES, 50)
annual_pivot, year_cols = create_annual_separations_table(annual_data, high_separation_agencies)
gt_annual = format_annual_separations_table(annual_pivot, year_cols)
gt_annual.show()

Top 50 Agencies with Highest Separation Rates
Annual separation rates by fiscal year (percentage) - agencies with 5000+ employees - FY2016-FY2022
Agency	FY2016	FY2017	FY2018	FY2019	FY2020	FY2021	FY2022	Avg
SB00-SMALL BUSINESS ADMINISTRATION	19%	27%	71%	33%	40%	40%	63%	42%
IN10-NATIONAL PARK SERVICE	45%	43%	45%	42%	36%	37%	38%	41%
AG11-FOREST SERVICE	37%	38%	37%	37%	34%	33%	33%	35%
AFNG-AIR NATIONAL GUARD UNITS (TITLE 32)	28%	29%	28%	30%	29%	31%	35%	30%
CM63-BUREAU OF THE CENSUS	19%	25%	21%	16%	34%	46%	32%	28%
IN05-BUREAU OF LAND MANAGEMENT	30%	27%	25%	27%	24%	26%	25%	26%
NV18-NAVAL MEDICAL COMMAND	11%	11%	12%	13%	11%	13%	90%	23%
DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY	24%	22%	20%	22%	20%	24%	25%	22%
DD34-DEFENSE COMMISSARY AGENCY	21%	19%	18%	19%	22%	23%	22%	21%
ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32)	17%	19%	20%	23%	19%	21%	23%	20%
AG03-AGRICULTURAL RESEARCH SERVICE	20%	20%	21%	22%	18%	18%	18%	19%
HE37-INDIAN HEALTH SERVICE	18%	16%	16%	15%	14%	15%	15%	15%
HSCB-FEDERAL EMERGENCY MANAGEMENT AGENCY	13%	13%	28%	12%	11%	13%	13%	15%
IN15-U.S. FISH AND WILDLIFE SERVICE	13%	13%	15%	13%	11%	12%	15%	13%
ARMC-U.S. ARMY MEDICAL COMMAND	13%	12%	12%	13%	11%	14%	14%	13%
NV52-COMMANDER, NAVY INSTALLATIONS	11%	12%	12%	12%	12%	14%	15%	13%
AF0J-AIR EDUCATION AND TRAINING COMMAND	13%	9%	10%	12%	10%	11%	21%	12%
IN08-GEOLOGICAL SURVEY	13%	12%	14%	13%	11%	11%	13%	12%
IN07-BUREAU OF RECLAMATION	13%	13%	12%	12%	10%	11%	13%	12%
AG34-ANIMAL AND PLANT HEALTH INSPECTION SERVICE	11%	11%	12%	12%	11%	12%	13%	12%
TR93-INTERNAL REVENUE SERVICE	12%	11%	12%	12%	9%	12%	14%	12%
HSBC-TRANSPORTATION SECURITY ADMINISTRATION	12%	12%	12%	12%	9%	11%	15%	12%
AF1L-AIR MOBILITY COMMAND	11%	9%	10%	12%	9%	11%	18%	12%
ARHR-U.S. ARMY RESERVE COMMAND	11%	11%	10%	10%	11%	11%	14%	11%
DJ01-OFFICES, BOARDS AND DIVISIONS	11%	12%	10%	12%	10%	11%	11%	11%
ARCE-U.S. ARMY CORPS OF ENGINEERS	12%	11%	11%	11%	10%	10%	11%	11%
AF0M-HEADQUARTERS, AIR FORCE RESERVE COMMAND	13%	10%	10%	12%	9%	10%	13%	11%
AF1C-AIR COMBAT COMMAND	10%	9%	10%	11%	9%	11%	16%	11%
IN06-INDIAN AFFAIRS	12%	11%	13%	10%	9%	9%	11%	11%
NV27-U.S. MARINE CORPS	10%	10%	10%	10%	9%	11%	13%	10%
FD00-FEDERAL DEPOSIT INSURANCE CORPORATION	12%	10%	10%	9%	9%	10%	12%	10%
ARBA-U.S. ARMY INSTALLATION MANAGEMENT COMMAND	12%	12%	12%	12%	0%	0%	0%	10%
VATA-VETERANS HEALTH ADMINISTRATION	10%	9%	9%	9%	9%	10%	11%	10%
DD04-DEFENSE INFORMATION SYSTEMS AGENCY	9%	7%	11%	9%	10%	10%	10%	10%
NV23-NAVAL SUPPLY SYSTEMS COMMAND	10%	9%	10%	10%	8%	9%	11%	10%
DD35-DEFENSE FINANCE AND ACCOUNTING SERVICE	9%	8%	9%	8%	7%	14%	10%	9%
HSAC-U.S. COAST GUARD	10%	8%	9%	9%	9%	9%	11%	9%
DN00-DEPARTMENT OF ENERGY	9%	10%	9%	8%	8%	10%	10%	9%
DJ09-EXECUTIVE OFFICE FOR U.S. ATTORNEYS AND THE OFFICES OF THE U.S. ATTORNEYS	8%	9%	8%	10%	8%	9%	11%	9%
DD07-DEFENSE LOGISTICS AGENCY	9%	8%	8%	9%	9%	9%	11%	9%
NV25-NAVAL FACILITIES ENGINEERING COMMAND	9%	8%	9%	9%	9%	10%	10%	9%
DD63-DEFENSE CONTRACT MANAGEMENT AGENCY	10%	8%	9%	9%	10%	7%	9%	9%
ARX7-U.S. ARMY TANK-AUTOMOTIVE AND ARMAMENT COMMAND (TACOM)	8%	6%	9%	7%	7%	9%	13%	8%
AG16-NATURAL RESOURCES CONSERVATION SERVICE	8%	8%	9%	8%	8%	8%	10%	8%
ARTC-U.S. ARMY TRAINING AND DOCTRINE COMMAND	8%	8%	7%	8%	9%	9%	10%	8%
AG37-FOOD SAFETY AND INSPECTION SERVICE	7%	7%	8%	8%	9%	9%	10%	8%
ARXQ-U.S. ARMY JOINT MUNITIONS COMMAND	8%	7%	8%	9%	8%	8%	11%	8%
HE38-NATIONAL INSTITUTES OF HEALTH	9%	8%	9%	8%	7%	8%	8%	8%
HE10-OFFICE OF THE SECRETARY OF HEALTH AND HUMAN SERVICES	6%	9%	11%	8%	5%	10%	8%	8%
DJ03-BUREAU OF PRISONS/FEDERAL PRISON SYSTEM	7%	8%	7%	7%	7%	8%	10%	8%

Tenure-Specific Separation Analysis

Show code

def create_tenure_analysis_table(tenure_data, high_separation_agencies):
    """Create tenure-specific separation analysis for specified agencies."""
    # Get tenure data for the same agencies
    tenure_summary_filtered = tenure_data[
        tenure_data['agysub'].isin(high_separation_agencies['agysub'])
    ].groupby(['agysub', 'agysubt', 'loslvl']).agg({
        'employment': 'mean',
        'separations': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    
    # Pivot tenure data to show side-by-side comparison
    tenure_pivot = tenure_summary_filtered.pivot_table(
        index=['agysub', 'agysubt'],
        columns='loslvl',
        values='separation_rate',
        aggfunc='mean'
    ).reset_index()
    
    # Add overall separation rate from the high_separation_agencies data
    overall_rates = high_separation_agencies[['agysub', 'avg_separation_rate', 'avg_employment']].copy()
    tenure_pivot = pd.merge(
        tenure_pivot,
        overall_rates,
        on='agysub',
        how='left'
    )
    
    # Sort by average separation rate to match the order from the previous table
    tenure_pivot = tenure_pivot.sort_values('avg_separation_rate', ascending=False).head(50)
    
    return tenure_pivot

def format_tenure_table(tenure_pivot):
    """Format the tenure analysis table for display."""
    # Prepare display data
    tenure_cols = ['agysubt', 'A', 'B', 'C+', 'avg_separation_rate', 'avg_employment']
    available_cols = [col for col in tenure_cols if col in tenure_pivot.columns]
    tenure_display = tenure_pivot[available_cols].copy()
    
    # Rename columns
    col_mapping = {
        'agysubt': 'Agency',
        'A': '<1 Year', 
        'B': '1-2 Years',
        'C+': '2+ Years',
        'avg_separation_rate': 'Overall',
        'avg_employment': 'Average Employment'
    }
    tenure_display.rename(columns=col_mapping, inplace=True)
    
    # Round numeric columns to integers
    separation_cols = ['<1 Year', '1-2 Years', '2+ Years', 'Overall']
    for col in separation_cols:
        if col in tenure_display.columns:
            tenure_display[col] = tenure_display[col].round(0).fillna(0).astype(int)
    if 'Average Employment' in tenure_display.columns:
        tenure_display['Average Employment'] = tenure_display['Average Employment'].round(0).fillna(0).astype(int)
    
    # Reset index
    tenure_display = tenure_display.reset_index(drop=True)
    
    # Find the maximum value across all separation columns for color scaling
    separation_cols_present = [col for col in separation_cols if col in tenure_display.columns]
    max_separations = tenure_display[separation_cols_present].max().max()
    
    # Create GT table
    gt_tenure = (
        GT(tenure_display)
        .tab_header(
            title="Annual Agency Separations by Employee Tenure",
            subtitle="Average annual separation rates by tenure group and overall (FY2016-FY2022)"
        )
        .fmt_number(columns=separation_cols_present, decimals=0, pattern="{x}%")
        .fmt_number(columns=['Average Employment'], decimals=0, use_seps=True)
        .data_color(
            columns=separation_cols_present,
            palette=["white", HIGH_SEPARATIONS_COLOR],
            domain=[0, max_separations],
            na_color="lightgray"
        )
        .cols_width({
            'Agency': '350px',
            '<1 Year': '80px',
            '1-2 Years': '80px',
            '2+ Years': '80px',
            'Overall': '80px',
            'Average Employment': '120px'
        })
    )
    
    return gt_tenure

# Execute the analysis
tenure_pivot = create_tenure_analysis_table(tenure_data, high_separation_agencies)
gt_tenure = format_tenure_table(tenure_pivot)
gt_tenure.show()

Annual Agency Separations by Employee Tenure
Average annual separation rates by tenure group and overall (FY2016-FY2022)
Agency	<1 Year	1-2 Years	2+ Years	Overall	Average Employment
SB00-SMALL BUSINESS ADMINISTRATION	107%	47%	18%	42%	6,126
IN10-NATIONAL PARK SERVICE	117%	93%	21%	41%	21,673
AG11-FOREST SERVICE	96%	72%	16%	35%	36,646
AFNG-AIR NATIONAL GUARD UNITS (TITLE 32)	76%	29%	17%	30%	20,885
CM63-BUREAU OF THE CENSUS	66%	26%	12%	28%	17,452
IN05-BUREAU OF LAND MANAGEMENT	103%	60%	14%	26%	10,468
NV18-NAVAL MEDICAL COMMAND	29%	27%	22%	23%	11,106
DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY	30%	49%	13%	22%	13,543
DD34-DEFENSE COMMISSARY AGENCY	61%	30%	12%	21%	12,748
ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32)	45%	20%	14%	20%	27,237
AG03-AGRICULTURAL RESEARCH SERVICE	55%	43%	9%	19%	7,104
HE37-INDIAN HEALTH SERVICE	48%	20%	10%	15%	13,687
HSCB-FEDERAL EMERGENCY MANAGEMENT AGENCY	34%	12%	9%	15%	18,546
IN15-U.S. FISH AND WILDLIFE SERVICE	60%	31%	8%	13%	8,679
ARMC-U.S. ARMY MEDICAL COMMAND	17%	20%	11%	13%	38,750
NV52-COMMANDER, NAVY INSTALLATIONS	21%	21%	11%	13%	11,112
AF0J-AIR EDUCATION AND TRAINING COMMAND	33%	15%	10%	12%	15,045
IN08-GEOLOGICAL SURVEY	41%	23%	9%	12%	8,105
IN07-BUREAU OF RECLAMATION	39%	16%	10%	12%	5,314
AG34-ANIMAL AND PLANT HEALTH INSPECTION SERVICE	32%	21%	8%	12%	8,208
TR93-INTERNAL REVENUE SERVICE	140%	25%	8%	12%	76,615
HSBC-TRANSPORTATION SECURITY ADMINISTRATION	33%	16%	8%	12%	60,896
AF1L-AIR MOBILITY COMMAND	30%	14%	10%	12%	7,989
ARHR-U.S. ARMY RESERVE COMMAND	11%	13%	11%	11%	8,512
DJ01-OFFICES, BOARDS AND DIVISIONS	21%	20%	9%	11%	6,425
ARCE-U.S. ARMY CORPS OF ENGINEERS	38%	16%	9%	11%	35,022
AF0M-HEADQUARTERS, AIR FORCE RESERVE COMMAND	13%	13%	10%	11%	11,952
AF1C-AIR COMBAT COMMAND	17%	16%	10%	11%	11,337
IN06-INDIAN AFFAIRS	36%	18%	8%	11%	7,008
NV27-U.S. MARINE CORPS	16%	14%	10%	10%	18,364
FD00-FEDERAL DEPOSIT INSURANCE CORPORATION	42%	19%	8%	10%	6,072
ARBA-U.S. ARMY INSTALLATION MANAGEMENT COMMAND	28%	16%	8%	10%	23,787
VATA-VETERANS HEALTH ADMINISTRATION	23%	12%	7%	10%	354,495
DD04-DEFENSE INFORMATION SYSTEMS AGENCY	15%	10%	9%	10%	5,944
NV23-NAVAL SUPPLY SYSTEMS COMMAND	12%	9%	10%	10%	6,772
DD35-DEFENSE FINANCE AND ACCOUNTING SERVICE	16%	11%	9%	9%	11,470
HSAC-U.S. COAST GUARD	10%	10%	9%	9%	8,610
DN00-DEPARTMENT OF ENERGY	14%	12%	9%	9%	13,258
DJ09-EXECUTIVE OFFICE FOR U.S. ATTORNEYS AND THE OFFICES OF THE U.S. ATTORNEYS	9%	12%	9%	9%	11,479
DD07-DEFENSE LOGISTICS AGENCY	16%	11%	8%	9%	24,611
NV25-NAVAL FACILITIES ENGINEERING COMMAND	14%	9%	9%	9%	15,969
DD63-DEFENSE CONTRACT MANAGEMENT AGENCY	9%	8%	9%	9%	11,299
ARX7-U.S. ARMY TANK-AUTOMOTIVE AND ARMAMENT COMMAND (TACOM)	13%	15%	8%	8%	11,677
AG16-NATURAL RESOURCES CONSERVATION SERVICE	21%	16%	7%	8%	9,932
ARTC-U.S. ARMY TRAINING AND DOCTRINE COMMAND	13%	11%	8%	8%	10,554
AG37-FOOD SAFETY AND INSPECTION SERVICE	19%	11%	7%	8%	9,196
ARXQ-U.S. ARMY JOINT MUNITIONS COMMAND	16%	12%	7%	8%	5,401
HE38-NATIONAL INSTITUTES OF HEALTH	25%	12%	6%	8%	18,600
HE10-OFFICE OF THE SECRETARY OF HEALTH AND HUMAN SERVICES	121%	13%	4%	8%	9,610
DJ03-BUREAU OF PRISONS/FEDERAL PRISON SYSTEM	17%	8%	7%	8%	37,179

Average Number of Separations vs. Separation Rate

Show code

def create_scatter_plot_data(annual_data, min_employees=5000):
    """Prepare data for average separations vs separation rate scatter plot."""
    scatter_data = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separations': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    scatter_data.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separations', 'avg_separation_rate']
    scatter_data = scatter_data[scatter_data['avg_employment'] >= min_employees]
    scatter_data['avg_separation_rate'] = scatter_data['avg_separation_rate'].round(0)
    scatter_data['avg_separations'] = scatter_data['avg_separations'].round(0)
    scatter_data['avg_employment'] = scatter_data['avg_employment'].round(0)
    return scatter_data

def create_scatter_plot(scatter_data):
    """Create interactive scatter plot of average separations vs separation rate."""
    fig = px.scatter(
        scatter_data, 
        x='avg_separations', 
        y='avg_separation_rate',
        hover_data={'agysubt': True, 'avg_separations': ':,.0f', 'avg_separation_rate': ':.0f'},
        labels={
            'avg_separations': 'Average Annual Number of Separations',
            'avg_separation_rate': 'Average Annual Separation Rate (%)',
            'agysubt': 'Agency'
        },
        title='Average Number of Separations vs. Separation Rate'
    )
    
    # Update layout
    fig.update_traces(
        marker=dict(size=10, color=HIGH_SEPARATIONS_COLOR, opacity=0.7),
        hovertemplate='<b>%{customdata[0]}</b><br>' +
                      'Average Separations: %{x:,.0f}<br>' +
                      'Separation Rate: %{y:.0f}%<br>' +
                      '<extra></extra>'
    )
    
    fig.update_layout(
        xaxis_title="Average Annual Number of Separations",
        yaxis_title="Average Annual Separation Rate (%)",
        hovermode='closest',
        height=600,
        annotations=[
            dict(
                text="Federal Agencies with 5,000+ Employees (FY2016-FY2022)",
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.5,
                y=1.05,
                xanchor="center",
                yanchor="bottom",
                font=dict(size=12, color="gray")
            )
        ]
    )
    
    return fig

# Execute the analysis
scatter_data = create_scatter_plot_data(annual_data, MIN_EMPLOYEES)
fig = create_scatter_plot(scatter_data)
fig.show()

Detailed Analysis of Top 10 Agencies with Most Separations

Net Change Heatmaps

Show code

def get_top_10_separation_agencies(annual_data, min_employees=5000):
    """Get top 10 agencies by separation rate."""
    agency_stats = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    agency_stats.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separation_rate']
    agency_stats = agency_stats[agency_stats['avg_employment'] >= min_employees]
    return agency_stats.nlargest(10, 'avg_separation_rate').copy()

def create_agency_heatmap(agency_code, agency_name, avg_separation_rate, 
                         monthly_accessions_data, monthly_separations_data):
    """Create net change heatmap for a single agency."""
    # Get monthly data for this agency
    monthly_acc_data = monthly_accessions_data.get(agency_code, [])
    monthly_sep_data = monthly_separations_data.get(agency_code, [])
    
    if monthly_acc_data and monthly_sep_data:
        # Combine monthly data
        monthly_accessions = pd.concat(monthly_acc_data).groupby(['year', 'month'])['count'].sum().reset_index()
        monthly_separations = pd.concat(monthly_sep_data).groupby(['year', 'month'])['count'].sum().reset_index()
        
        # Merge accessions and separations
        monthly_combined = pd.merge(
            monthly_accessions, 
            monthly_separations, 
            on=['year', 'month'], 
            how='outer',
            suffixes=('_acc', '_sep')
        ).fillna(0)
        
        monthly_combined['accessions'] = monthly_combined['count_acc'].astype(int)
        monthly_combined['separations'] = monthly_combined['count_sep'].astype(int)
        monthly_combined['net_change'] = monthly_combined['accessions'] - monthly_combined['separations']
        monthly_combined = monthly_combined.drop(['count_acc', 'count_sep'], axis=1)
        
        # Create pivot table for net change
        net_pivot = monthly_combined.pivot(index='month', columns='year', values='net_change')
        
        # Ensure all years 2016-2025 are present
        all_years = list(range(2016, 2026))
        for year in all_years:
            if year not in net_pivot.columns:
                net_pivot[year] = np.nan
        net_pivot = net_pivot[all_years]
        
        # Create dataframe for GT table
        net_df = net_pivot.reset_index()
        net_df['month'] = pd.to_datetime(net_df['month'], format='%m').dt.strftime('%B')
        
        # Convert year columns to strings
        net_df.columns = ['month'] + [str(col) for col in net_df.columns[1:]]
        
        # Calculate max absolute value for symmetric color scale
        max_abs_val = abs(net_pivot.fillna(0).values).max()
        
        # Create GT table
        year_columns = [col for col in net_df.columns if col != 'month']
        gt_net = (
            GT(net_df)
            .tab_header(
                title=f"{agency_name}",
                subtitle=f"Monthly Net Change (2016-2025) - Average Separation Rate: {avg_separation_rate:.0f}%"
            )
            .fmt_number(columns=year_columns, decimals=0, use_seps=True)
            .data_color(
                columns=year_columns,
                palette=[NET_NEGATIVE_COLOR, "white", NET_POSITIVE_COLOR],
                domain=[-max_abs_val, max_abs_val],
                na_color="lightgray"
            )
            .cols_width({
                'month': '100px'
            })
        )
        
        return gt_net
    return None

def display_all_heatmaps(annual_data, monthly_accessions_data, monthly_separations_data):
    """Display heatmaps for all top 10 agencies."""
    high_separation_agencies = get_top_10_separation_agencies(annual_data, MIN_EMPLOYEES)
    
    for idx, agency_row in high_separation_agencies.iterrows():
        agency_code = agency_row['agysub']
        agency_name = agency_row['agysubt']
        avg_separation_rate = agency_row['avg_separation_rate']
        
        gt_table = create_agency_heatmap(
            agency_code, agency_name, avg_separation_rate,
            monthly_accessions_data, 
            monthly_separations_data
        )
        
        if gt_table:
            gt_table.show()

# Execute the heatmap display
display_all_heatmaps(annual_data, all_data['monthly_accessions_data'], all_data['monthly_separations_data'])

SB00-SMALL BUSINESS ADMINISTRATION
Monthly Net Change (2016-2025) - Average Separation Rate: 42%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−35	−169	−226	−333	−32	−511	76	507		−189
February	−8	−42	−393	−176	−23	−294	79	162		−65
March	−29	−32	−439	−160	165	−272	−319	−67		−118
April	−77	−87	−432	−38	424	−96	−303	25	−20
May	−31	−16	−371	−22	675	−49	−3,353	20	22
June	−19	−6	−98	−42	779	187	−95	71	43
July	9	−30	−103	30	1,071	168	−115	−70	−96
August	25	−1	−28	−15	1,327	117	−32	−33	16
September	302	1,420	212	87	842	169	−58	−4	−40
October	140	1,592	187	−156	463	163	451		355
November	−150	425	80	−69	554	139	630		279
December	−56	75	−275	−49	−233	32	459		53

IN10-NATIONAL PARK SERVICE
Monthly Net Change (2016-2025) - Average Separation Rate: 41%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−113	46	−185	−171	−52	−52	44	160		98
February	−13	−43	−76	111	−15	42	−25	86		−160
March	269	−42	−36	312	359	298	241	97		−144
April	1,150	2,057	2,172	2,022	312	1,641	1,542	1,590	1,495
May	4,056	2,752	2,992	2,899	2,626	2,732	2,649	2,468	2,392
June	852	871	854	935	1,453	607	772	673	704
July	143	171	113	32	283	−80	35	109	−9
August	−547	−496	−586	−622	−225	−392	−477	−395	−400
September	−2,299	−2,543	−2,383	−2,007	−1,400	−1,557	−1,495	−1,458	−1,246
October	−2,192	−2,066	−2,469	−2,236	−1,631	−2,154	−1,973		−1,628
November	−871	−1,031	−797	−827	−813	−799	−657		−927
December	−378	−423	−323	−230	−377	−244	−276		−12

AG11-FOREST SERVICE
Monthly Net Change (2016-2025) - Average Separation Rate: 35%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−167	251	−211	−299	−182	−153	−89	290		−255
February	29	−65	−17	−134	−33	64	90	99		−168
March	433	108	86	520	1,078	561	684	724		68
April	1,708	4,094	3,694	3,366	2,338	2,623	2,570	2,522	1,510
May	7,366	4,879	5,001	4,618	5,266	4,190	3,206	2,937	2,538
June	970	1,011	888	1,074	1,140	787	1,068	1,671	1,392
July	103	−20	53	−33	−38	−314	390	407	−148
August	−1,351	−1,315	−1,195	−1,208	−856	−1,102	−800	−840	−892
September	−1,955	−2,549	−2,177	−2,027	−1,389	−1,474	−1,254	−865	−1,004
October	−4,103	−3,685	−3,398	−2,935	−2,432	−2,654	−1,754		−1,642
November	−2,128	−2,342	−2,238	−2,163	−2,306	−2,017	−1,479		−1,137
December	−716	−675	−697	−852	−873	−963	−757		−480

AFNG-AIR NATIONAL GUARD UNITS (TITLE 32)
Monthly Net Change (2016-2025) - Average Separation Rate: 30%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	115	58	−21	92	−43	65	−47	184		−66
February	196	−33	173	245	25	28	−150	62		−38
March	−69	−38	−158	−89	3	−82	−365	−95		−319
April	−64	80	232	−131	−136	−170	−300	3	−139
May	398	297	119	−38	−142	73	−181	21	36
June	123	118	−66	−168	−128	−20	−163	34	136
July	−20	−31	−83	−42	−140	−155	−224	105	−32
August	−173	−316	−285	−241	−94	−27	−197	−121	−218
September	−747	−1,154	−506	−462	−559	−742	−739	−554	−610
October	724	1,127	478	294	411	113	175		284
November	227	158	398	45	165	−10	39		68
December	−148	−117	−128	−137	−90	−242	−167		−133

CM63-BUREAU OF THE CENSUS
Monthly Net Change (2016-2025) - Average Separation Rate: 28%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−55	326	26	167	766	−268	−84	318		−129
February	98	76	−13	194	798	169	−372	692		−392
March	232	149	−56	178	831	667	−505	1,177		−415
April	174	373	185	1,055	104	840	−173	525	−37
May	126	465	63	1,289	121	442	−98	119	72
June	110	381	70	999	−48	−112	−175	−586	−94
July	77	−97	67	821	−381	−191	−140	−271	−139
August	−67	−139	11	438	−347	−294	−65	−354	−7
September	170	−279	0	434	−1,324	−1,242	−125	−360	−3
October	143	−352	210	207	−1,499	−532	−52		−20
November	220	−582	95	−206	−1,734	107	59		23
December	17	−100	−55	353	−377	121	177		15

IN05-BUREAU OF LAND MANAGEMENT
Monthly Net Change (2016-2025) - Average Separation Rate: 26%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−53	115	−53	−55	−55	−7	−32	35		38
February	24	67	−23	−19	−65	6	24	55		−83
March	35	−46	−10	85	63	95	60	81		−47
April	227	376	449	382	334	310	307	354	290
May	1,296	923	929	911	1,036	949	892	797	676
June	323	246	215	286	309	282	174	256	327
July	5	2	52	56	13	−50	0	107	64
August	−306	−283	−292	−301	−209	−204	−246	−154	−174
September	−563	−621	−445	−501	−343	−349	−274	−259	−138
October	−587	−417	−567	−438	−599	−597	−436		−347
November	−183	−215	−186	−261	−299	−219	−220		−177
December	−87	−149	−148	−141	−157	−173	−174		−54

NV18-NAVAL MEDICAL COMMAND
Monthly Net Change (2016-2025) - Average Separation Rate: 23%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−33	−7	15	49	−50	−12	−6	2		−2
February	100	−48	41	8	−60	−12	0	−1		13
March	61	−70	−18	23	30	51	16	3		−15
April	14	0	71	94	−28	−10	−13	−10	−2
May	80	39	19	−11	−13	−14	−597	−1	6
June	38	0	9	−41	3	−34	−23	−3	14
July	35	−9	28	−5	−19	−90	−3,209	−14	16
August	27	8	19	−72	−11	8	−3,724	−17	2
September	4	−25	17	49	−28	−21	−609	19	−4
October	76	58	73	14	−11	−21	−135		15
November	55	41	47	−17	24	−91	4		6
December	−51	−75	−77	−83	−98	−114	−21		−1

DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY
Monthly Net Change (2016-2025) - Average Separation Rate: 22%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	112	56	100	179	162	−16	197	263		168
February	101	−60	208	93	60	92	67	117		158
March	77	46	150	78	94	145	49	57		−72
April	33	51	170	203	22	61	66	82	120
May	−3	−66	−1	−15	−81	−17	−14	7	13
June	−1,014	−1,031	−734	−1,120	−804	−777	−736	−774	−872
July	−305	−300	−266	−192	−260	−373	−389	−373	−303
August	164	274	486	295	227	438	344	422	625
September	49	19	116	187	109	141	201	221	248
October	169	170	209	189	111	125	214		246
November	129	94	154	186	168	−53	150		229
December	49	4	20	72	−20	−6	98		165

DD34-DEFENSE COMMISSARY AGENCY
Monthly Net Change (2016-2025) - Average Separation Rate: 21%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−17	14	−73	−3	6	−7	−32	115		31
February	10	−79	3	−29	−11	5	69	37		56
March	28	−167	−27	−73	356	−137	37	74		−180
April	7	−109	28	23	674	−166	−8	26	42
May	106	227	30	−94	169	−150	25	−25	−7
June	171	82	−109	−78	−15	−56	8	−26	42
July	6	34	−95	−72	−112	−90	−31	53	105
August	−89	−74	−91	−153	−250	2	31	28	−49
September	−145	−183	−133	−42	−128	−42	9	4	−6
October	129	141	−38	−33	−105	−57	13		67
November	130	−27	−31	−28	19	−103	45		23
December	−38	−33	−94	−29	−44	−81	−73		53

ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32)
Monthly Net Change (2016-2025) - Average Separation Rate: 20%
month	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
January	−42	57	25	49	68	178	−119	365		47
February	30	14	194	105	57	−33	−152	126		79
March	−19	−32	−12	53	299	−251	−195	26		−186
April	−54	203	482	39	114	−33	−111	72	37
May	225	109	314	30	−19	−23	−131	38	−15
June	82	195	222	−34	−11	−128	−100	44	120
July	8	31	24	10	27	−203	−74	229	5
August	−11	−54	−96	−135	163	113	−101	46	−67
September	−380	−586	−330	−222	−321	−534	−305	−266	−267
October	257	442	332	257	353	−4	184		233
November	−34	83	112	191	326	−56	93		69
December	−59	10	−59	−38	67	−39	−22		125

Employment Over Time

Show code

def create_employment_graph(agency_code, agency_name, employment_time_series_data):
    """Create employment over time graph for a single agency."""
    employment_timeline = employment_time_series_data.get(agency_code, [])
    
    if employment_timeline:
        # Convert to DataFrame and sort
        emp_df = pd.DataFrame(employment_timeline)
        emp_df['sort_key'] = emp_df['year'] * 10 + emp_df['month'].map({'March': 1, 'September': 2})
        emp_df = emp_df.sort_values('sort_key')
        
        # Create line graph
        fig, ax = plt.subplots(figsize=(12, 6))
        
        # Separate March and September data for different colors
        march_data = emp_df[emp_df['month'] == 'March']
        sept_data = emp_df[emp_df['month'] == 'September']
        
        # Plot lines
        ax.plot(emp_df['period'], emp_df['employment'], 
                'k-', alpha=0.3, linewidth=1)
        
        # Plot points
        if not march_data.empty:
            ax.scatter(march_data['period'], march_data['employment'], 
                      color='blue', s=100, label='March', zorder=5)
        if not sept_data.empty:
            ax.scatter(sept_data['period'], sept_data['employment'], 
                      color='orange', s=100, label='September', zorder=5)
        
        # Formatting
        ax.set_xlabel('Period', fontsize=12)
        ax.set_ylabel('Total Employment', fontsize=12)
        ax.set_title(f'{agency_name} Employment Over Time', fontsize=14, fontweight='bold')
        
        # Rotate x-axis labels
        plt.xticks(rotation=45, ha='right')
        
        # Add grid
        ax.grid(True, alpha=0.3)
        
        # Set y-axis to start at 0
        ax.set_ylim(bottom=0)
        
        # Format y-axis with commas
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{int(x):,}'))
        
        # Add legend
        ax.legend()
        
        # Tight layout
        plt.tight_layout()
        plt.show()

def display_all_employment_graphs(annual_data, employment_time_series_data):
    """Display employment graphs for all top 10 agencies."""
    high_separation_agencies = get_top_10_separation_agencies(annual_data, MIN_EMPLOYEES)
    
    for idx, agency_row in high_separation_agencies.iterrows():
        agency_code = agency_row['agysub']
        agency_name = agency_row['agysubt']
        
        create_employment_graph(agency_code, agency_name, employment_time_series_data)

# Execute the employment graph display
display_all_employment_graphs(annual_data, all_data['employment_time_series_data'])

State-Level Analysis

Show code

# State mapping from numeric codes to abbreviations
state_mapping = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
    '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
    '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
    '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
    '24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
    '29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
    '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
    '45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
    '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
    '56': 'WY'
}

# Load state-level employment data
state_employment_all = []
fiscal_years = get_fiscal_years()

for fy in fiscal_years:
    baseline_year = fy - 1
    file_path = f"fedscope_employment_September_{baseline_year}.parquet"
    
    try:
        df = pd.read_parquet(EMPLOYMENT_PARQUET_DIR + file_path)
        df['employment_num'] = df['employment'].apply(clean_employment)
        
        # Filter for US states only
        df_states = df[df['loc'].isin(state_mapping.keys())].copy()
        df_states['state'] = df_states['loc'].map(state_mapping)
        
        # Aggregate by state
        state_grouped = df_states.groupby(['state']).agg({
            'employment_num': 'sum'
        }).reset_index()
        state_grouped['fiscal_year'] = fy
        state_grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
        
        state_employment_all.append(state_grouped)
        
    except Exception as e:
        print(f"Error loading FY{fy}: {e}")

employment_state = pd.concat(state_employment_all, ignore_index=True)

# Load state-level separations data
separations_files = [
    "fedscope_separations_FY2015-2019.parquet",
    "fedscope_separations_FY2020-2024.parquet"
]

state_separations_all = []

for file in separations_files:
    try:
        df = pd.read_parquet(SEPARATIONS_PARQUET_DIR + file)
        
        if 'loc' not in df.columns:
            continue
            
        df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
        df = df[df['fiscal_year'].isin(fiscal_years)]
        
        # Filter for US states only
        df_states = df[df['loc'].isin(state_mapping.keys())].copy()
        df_states['state'] = df_states['loc'].map(state_mapping)
        
        # Aggregate by state and fiscal year
        state_grouped = df_states.groupby(['fiscal_year', 'state']).size().reset_index(name='separations')
        
        state_separations_all.append(state_grouped)
        
    except Exception as e:
        print(f"Error: {e}")

separations_state = pd.concat(state_separations_all, ignore_index=True)

# Merge employment and separations
state_annual = pd.merge(
    employment_state,
    separations_state,
    on=['fiscal_year', 'state'],
    how='outer'
)

# Fill NaN values
state_annual['employment'] = state_annual['employment'].fillna(0)
state_annual['separations'] = state_annual['separations'].fillna(0)

# Calculate separation rate
state_annual['separation_rate'] = np.where(
    state_annual['employment'] > 0,
    (state_annual['separations'] / state_annual['employment']) * 100,
    np.nan
)

# Filter for meaningful sample sizes (at least 1000 federal employees)
state_annual = state_annual[state_annual['employment'] >= 1000]
state_annual = state_annual.dropna(subset=['separation_rate'])

# Calculate average statistics by state
state_avg_stats = state_annual.groupby('state').agg({
    'employment': 'mean',
    'separations': 'mean', 
    'separation_rate': 'mean'
}).reset_index()

state_avg_stats.columns = ['State', 'Avg Employment', 'Avg Separations', 'Avg Separation Rate']

# Sort by separation rate
state_avg_stats = state_avg_stats.sort_values('Avg Separation Rate', ascending=False)

State Separation Rates Map

Show code

# Create map data
map_data = state_avg_stats.copy()
map_data.rename(columns={'State': 'state', 'Avg Separation Rate': 'separation_rate'}, inplace=True)
map_data['separation_rate'] = map_data['separation_rate'].round(1)

# Create choropleth
fig = px.choropleth(
    map_data,
    locations='state',
    locationmode='USA-states',
    color='separation_rate',
    color_continuous_scale='Reds',
    scope='usa',
    labels={'separation_rate': 'Avg Separation Rate (%)'},
    title='Federal Employee Separation Rates by State (FY2016-2022)',
    hover_data={'separation_rate': ':.1f'}
)

fig.update_layout(
    title={
        'text': 'Federal Employee Separation Rates by State (FY2016-2022)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    geo=dict(
        bgcolor='rgba(0,0,0,0)',
        lakecolor='rgb(255, 255, 255)',
    ),
    width=1100,
    height=600
)

fig.show()

State Separation Rates by Year

Show code

# Get top 15 states by separation rate
top_states = state_avg_stats.head(15)['State'].tolist()

# Filter data for these states
annual_data = state_annual[state_annual['state'].isin(top_states)].copy()

# Create pivot table for annual rates
annual_pivot = annual_data.pivot_table(
    index='state',
    columns='fiscal_year',
    values='separation_rate',
    aggfunc='mean'
)

# Get summary stats for these states
summary_stats = state_avg_stats[state_avg_stats['State'].isin(top_states)].set_index('State')

# Merge annual rates with summary stats
combined_data = annual_pivot.merge(
    summary_stats[['Avg Employment', 'Avg Separations', 'Avg Separation Rate']], 
    left_index=True, 
    right_index=True
)

# Sort by average separation rate
combined_data = combined_data.sort_values('Avg Separation Rate', ascending=False)

# Reset index to make State a column
combined_data = combined_data.reset_index()
combined_data.rename(columns={'index': 'State'}, inplace=True)

# Prepare columns for display
year_columns = [col for col in combined_data.columns if isinstance(col, (int, float)) and col < 2100]
year_columns = sorted(year_columns)

# Select and order columns
display_columns = ['State'] + year_columns + ['Avg Separation Rate', 'Avg Employment', 'Avg Separations']
combined_data = combined_data[display_columns]

# Rename year columns
column_rename = {'State': 'State'}
for year in year_columns:
    column_rename[year] = f'FY{int(year)}'
column_rename['Avg Separation Rate'] = 'Avg Rate'
column_rename['Avg Employment'] = 'Avg Employment'
column_rename['Avg Separations'] = 'Avg Separations'

combined_data.rename(columns=column_rename, inplace=True)

# Round values
fy_columns = [col for col in combined_data.columns if col.startswith('FY')]
for col in fy_columns:
    combined_data[col] = combined_data[col].round(0).fillna(0).astype(int)
combined_data['Avg Rate'] = combined_data['Avg Rate'].round(1)
combined_data['Avg Employment'] = combined_data['Avg Employment'].round(0).astype(int)
combined_data['Avg Separations'] = combined_data['Avg Separations'].round(0).astype(int)

# Create GT table
gt_states_comprehensive = (
    GT(combined_data)
    .tab_header(
        title="Top 15 States by Federal Employee Separation Rate",
        subtitle="Annual separation rates (FY2016-2022) with average employment and separations"
    )
    .fmt_number(columns=fy_columns, decimals=0, pattern="{x}%")
    .fmt_number(columns=['Avg Rate'], decimals=1, pattern="{x}%")
    .fmt_number(columns=['Avg Employment', 'Avg Separations'], decimals=0, use_seps=True)
    .data_color(
        columns=fy_columns + ['Avg Rate'],
        palette=["white", HIGH_SEPARATIONS_COLOR],
        domain=[0, combined_data[fy_columns + ['Avg Rate']].max().max()],
        na_color="lightgray"
    )
    .cols_width({
        'State': '80px',
        'Avg Employment': '120px',
        'Avg Separations': '120px',
        'Avg Rate': '80px'
    })
    .tab_spanner(
        label="Annual Separation Rates",
        columns=fy_columns
    )
    .tab_spanner(
        label="Averages",
        columns=['Avg Rate', 'Avg Employment', 'Avg Separations']
    )
)

gt_states_comprehensive.show()

Top 15 States by Federal Employee Separation Rate
Annual separation rates (FY2016-2022) with average employment and separations
State	Annual Separation Rates							Averages
State	FY2016	FY2017	FY2018	FY2019	FY2020	FY2021	FY2022	Avg Rate	Avg Employment	Avg Separations
WY	29%	30%	29%	27%	25%	25%	27%	27.6%	6,374	1,760
MT	28%	27%	29%	27%	26%	25%	26%	26.7%	10,387	2,774
ID	28%	29%	27%	26%	24%	25%	26%	26.3%	9,773	2,567
OR	21%	20%	20%	20%	18%	20%	21%	20.0%	20,227	4,040
AK	20%	19%	19%	18%	17%	18%	20%	18.7%	11,231	2,104
SD	18%	17%	18%	18%	15%	17%	18%	17.1%	8,386	1,436
NV	15%	14%	18%	14%	13%	16%	16%	15.2%	11,646	1,770
AZ	16%	15%	14%	14%	14%	15%	16%	14.9%	32,209	4,801
CO	15%	14%	15%	15%	13%	14%	16%	14.6%	37,722	5,522
NM	15%	14%	15%	14%	13%	14%	16%	14.4%	21,899	3,157
ND	14%	13%	15%	14%	12%	15%	17%	14.2%	5,500	780
CA	13%	12%	13%	13%	12%	13%	16%	13.2%	143,798	19,005
VT	14%	12%	14%	13%	12%	13%	15%	13.1%	3,249	426
UT	13%	13%	13%	13%	11%	13%	14%	13.0%	29,486	3,847
MO	14%	12%	13%	13%	11%	12%	14%	12.7%	35,236	4,483

State Separations vs. Rate Scatter Plot

Show code

# Round for display
scatter_data = state_avg_stats.copy()
scatter_data['Avg Employment'] = scatter_data['Avg Employment'].round(0)
scatter_data['Avg Separations'] = scatter_data['Avg Separations'].round(0)
scatter_data['Avg Separation Rate'] = scatter_data['Avg Separation Rate'].round(1)

# Create interactive scatter plot
fig = px.scatter(
    scatter_data, 
    x='Avg Separations', 
    y='Avg Separation Rate',
    text='State',
    hover_data={
        'State': True, 
        'Avg Separations': ':,.0f', 
        'Avg Separation Rate': ':.1f'
    },
    labels={
        'Avg Separations': 'Average Annual Number of Separations',
        'Avg Separation Rate': 'Average Annual Separation Rate (%)',
        'State': 'State'
    },
    title='State-Level: Average Separations vs. Separation Rate'
)

# Update layout
fig.update_traces(
    marker=dict(size=12, color=HIGH_SEPARATIONS_COLOR, opacity=0.7),
    textposition='top right',
    textfont=dict(size=9),
    hovertemplate='<b>%{text}</b><br>' +
                  'Average Separations: %{x:,.0f}<br>' +
                  'Separation Rate: %{y:.1f}%<br>' +
                  '<extra></extra>'
)

fig.update_layout(
    xaxis_title="Average Annual Number of Separations",
    yaxis_title="Average Annual Separation Rate (%)",
    hovermode='closest',
    height=600,
    annotations=[
        dict(
            text="All states shown have 1,000+ federal employees",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.02,
            y=0.98,
            xanchor="left",
            yanchor="top",
            font=dict(size=10, color="gray", style="italic")
        )
    ]
)

fig.show()