Agency Separations Analysis

Author

Abigail Haddad

Published

July 28, 2025

THIS IS A DRAFT! Please talk to me before you use this.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from great_tables import GT
from pathlib import Path
import gc
import plotly.express as px

# Set up pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Constants
EMPLOYMENT_PARQUET_DIR = "../../employment_cube/parquet/"
SEPARATIONS_PARQUET_DIR = "../../separations_accessions/parquet/"
MIN_EMPLOYEES = 5000
TOP_N_AGENCIES = 50

# Color configuration
HIGH_SEPARATIONS_COLOR = "#d62728"
LOW_SEPARATIONS_COLOR = "#2ca02c"
NET_POSITIVE_COLOR = "#2ca02c"
NET_NEGATIVE_COLOR = "#d62728"
ACCESSIONS_COLOR = "#1f77b4"
SEPARATIONS_COLOR = "#ff7f0e"

# Utility Functions
def clean_employment(x):
    """Convert employment values to numeric, handling REDACTED and ***** values"""
    if pd.isna(x) or x in ['REDACTED', '*****', '']:
        return 0
    try:
        return int(x)
    except:
        return 0

def calculate_fiscal_year(efdate):
    """Convert efdate (YYYYMM) to fiscal year.
    Fiscal year runs Oct-Sept, so Oct 2018 (201810) = FY2019"""
    efdate_str = str(efdate)
    year = int(efdate_str[:4])
    month = int(efdate_str[4:6])
    # Oct-Dec of year X = FY X+1
    return year + 1 if month >= 10 else year

Data Loading and Processing

Show code
def load_employment_data(fiscal_years, parquet_dir):
    """Load September employment data for fiscal year baselines."""
    employment_aggregated = []
    employment_tenure_aggregated = []
    agency_lookups = None
    loading_summary = []
    
    for fy in fiscal_years:
        baseline_year = fy - 1
        file_path = f"fedscope_employment_September_{baseline_year}.parquet"
        try:
            df = pd.read_parquet(parquet_dir + file_path)
            raw_count = len(df)
            
            df['employment_num'] = df['employment'].apply(clean_employment)
            
            if agency_lookups is None:
                agency_lookups = df[['agysub', 'agysubt']].drop_duplicates()
            
            # Overall employment
            grouped = df.groupby(['agysub']).agg({'employment_num': 'sum'}).reset_index()
            grouped['fiscal_year'] = fy
            grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_aggregated.append(grouped)
            
            # Tenure-specific employment (A and B levels)
            tenure_grouped = df[df['loslvl'].isin(['A', 'B'])].groupby(['agysub', 'loslvl']).agg({
                'employment_num': 'sum'
            }).reset_index()
            tenure_grouped['fiscal_year'] = fy
            tenure_grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_tenure_aggregated.append(tenure_grouped)
            
            loading_summary.append({
                'Year': f'FY{fy}',
                'Data Type': 'Employment (Sept)',
                'Raw Records': raw_count,
                'Aggregated Groups': len(grouped)
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(employment_aggregated, ignore_index=True),
            pd.concat(employment_tenure_aggregated, ignore_index=True),
            agency_lookups,
            loading_summary)

def load_separations_data(fiscal_years, parquet_dir):
    """Load separations data from multiple files."""
    separations_files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    separations_aggregated = []
    separations_tenure_aggregated = []
    loading_summary = []
    
    for file in separations_files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            raw_count = len(df)
            
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            df = df[df['fiscal_year'].isin(fiscal_years)]
            
            # Overall separations
            grouped = df.groupby(['fiscal_year', 'agysub']).size().reset_index(name='separations')
            separations_aggregated.append(grouped)
            
            # Tenure-specific separations
            tenure_seps = df[df['loslvl'].isin(['A', 'B'])].groupby(
                ['fiscal_year', 'agysub', 'loslvl']
            ).size().reset_index(name='separations')
            separations_tenure_aggregated.append(tenure_seps)
            
            year_range = 'March 2025' if 'March_2025' in file else (
                '2015-2019' if '2015-2019' in file else '2020-2024'
            )
            loading_summary.append({
                'Year': year_range,
                'Data Type': 'Separations',
                'Raw Records': raw_count,
                'Aggregated Groups': len(grouped)
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(separations_aggregated, ignore_index=True),
            pd.concat(separations_tenure_aggregated, ignore_index=True),
            loading_summary)

def load_accessions_data(fiscal_years, parquet_dir):
    """Load accessions data from multiple files."""
    accessions_files = [
        "fedscope_accessions_FY2015-2019.parquet",
        "fedscope_accessions_FY2020-2024.parquet",
        "fedscope_accessions_March_2025.parquet"
    ]
    
    accessions_aggregated = []
    loading_summary = []
    
    for file in accessions_files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            raw_count = len(df)
            
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            
            if 'March_2025' not in file:
                df = df[df['fiscal_year'].isin(fiscal_years)]
            
            grouped = df.groupby(['fiscal_year', 'agysub']).size().reset_index(name='accessions')
            
            if len(grouped) > 0:
                accessions_aggregated.append(grouped)
                aggregated_count = len(grouped)
            else:
                aggregated_count = 0
            
            year_range = 'March 2025' if 'March_2025' in file else (
                '2015-2019' if '2015-2019' in file else '2020-2024'
            )
            loading_summary.append({
                'Year': year_range,
                'Data Type': 'Accessions',
                'Raw Records': raw_count,
                'Aggregated Groups': aggregated_count
            })
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return pd.concat(accessions_aggregated, ignore_index=True), loading_summary

def load_tenure_2plus_data(fiscal_years, employment_parquet_dir, separations_parquet_dir):
    """Load employment and separations data for employees with 2+ years tenure."""
    employment_2plus_aggregated = []
    
    for fy in fiscal_years:
        baseline_year = fy - 1
        file_path = f"fedscope_employment_September_{baseline_year}.parquet"
        try:
            df = pd.read_parquet(employment_parquet_dir + file_path)
            df['employment_num'] = df['employment'].apply(clean_employment)
            
            tenure_2plus = df[~df['loslvl'].isin(['A', 'B'])].groupby(['agysub']).agg({
                'employment_num': 'sum'
            }).reset_index()
            tenure_2plus['fiscal_year'] = fy
            tenure_2plus['loslvl'] = 'C+'
            tenure_2plus.rename(columns={'employment_num': 'employment'}, inplace=True)
            employment_2plus_aggregated.append(tenure_2plus)
            
            del df
            gc.collect()
        except Exception as e:
            continue
    
    separations_files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    separations_2plus_aggregated = []
    for file in separations_files:
        try:
            df = pd.read_parquet(separations_parquet_dir + file)
            df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
            df = df[df['fiscal_year'].isin(fiscal_years)]
            tenure_2plus_seps = df[~df['loslvl'].isin(['A', 'B'])].groupby(
                ['fiscal_year', 'agysub']
            ).size().reset_index(name='separations')
            tenure_2plus_seps['loslvl'] = 'C+'
            separations_2plus_aggregated.append(tenure_2plus_seps)
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return (pd.concat(employment_2plus_aggregated, ignore_index=True),
            pd.concat(separations_2plus_aggregated, ignore_index=True))

def load_monthly_data_for_heatmaps(parquet_dir):
    """Load monthly accessions and separations data for heatmaps."""
    files = [
        "fedscope_accessions_FY2015-2019.parquet",
        "fedscope_accessions_FY2020-2024.parquet",
        "fedscope_accessions_March_2025.parquet"
    ]
    
    monthly_accessions_data = {}
    for file in files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            df['year'] = df['efdate'].astype(str).str[:4].astype(int)
            df['month'] = df['efdate'].astype(str).str[4:6].astype(int)
            df = df[df['year'].between(2016, 2025)]
            monthly_counts = df.groupby(['agysub', 'year', 'month']).size().reset_index(name='count')
            for agysub in monthly_counts['agysub'].unique():
                if agysub not in monthly_accessions_data:
                    monthly_accessions_data[agysub] = []
                monthly_accessions_data[agysub].append(monthly_counts[monthly_counts['agysub'] == agysub])
            del df
            gc.collect()
        except Exception as e:
            continue
    
    # Same for separations
    files = [
        "fedscope_separations_FY2015-2019.parquet",
        "fedscope_separations_FY2020-2024.parquet",
        "fedscope_separations_March_2025.parquet"
    ]
    
    monthly_separations_data = {}
    for file in files:
        try:
            df = pd.read_parquet(parquet_dir + file)
            df['year'] = df['efdate'].astype(str).str[:4].astype(int)
            df['month'] = df['efdate'].astype(str).str[4:6].astype(int)
            df = df[df['year'].between(2016, 2025)]
            monthly_counts = df.groupby(['agysub', 'year', 'month']).size().reset_index(name='count')
            for agysub in monthly_counts['agysub'].unique():
                if agysub not in monthly_separations_data:
                    monthly_separations_data[agysub] = []
                monthly_separations_data[agysub].append(monthly_counts[monthly_counts['agysub'] == agysub])
            del df
            gc.collect()
        except Exception as e:
            continue
    
    return monthly_accessions_data, monthly_separations_data

def load_employment_time_series(employment_parquet_dir, years_range):
    """Load March and September employment data for time series."""
    employment_time_series_data = {}
    
    for year in years_range:
        # March data
        march_file = f"fedscope_employment_March_{year}.parquet"
        march_path = employment_parquet_dir + march_file
        if Path(march_path).exists():
            try:
                df = pd.read_parquet(march_path)
                df['employment_num'] = df['employment'].apply(clean_employment)
                agency_totals = df.groupby('agysub')['employment_num'].sum().reset_index()
                for _, row in agency_totals.iterrows():
                    agysub = row['agysub']
                    if agysub not in employment_time_series_data:
                        employment_time_series_data[agysub] = []
                    employment_time_series_data[agysub].append({
                        'period': f"March {year}",
                        'year': year,
                        'month': 'March',
                        'employment': int(row['employment_num'])
                    })
                del df
                gc.collect()
            except Exception as e:
                pass
        
        # September data
        sept_file = f"fedscope_employment_September_{year}.parquet"
        sept_path = employment_parquet_dir + sept_file
        if Path(sept_path).exists():
            try:
                df = pd.read_parquet(sept_path)
                df['employment_num'] = df['employment'].apply(clean_employment)
                agency_totals = df.groupby('agysub')['employment_num'].sum().reset_index()
                for _, row in agency_totals.iterrows():
                    agysub = row['agysub']
                    if agysub not in employment_time_series_data:
                        employment_time_series_data[agysub] = []
                    employment_time_series_data[agysub].append({
                        'period': f"September {year}",
                        'year': year,
                        'month': 'September',
                        'employment': int(row['employment_num'])
                    })
                del df
                gc.collect()
            except Exception as e:
                pass
    
    return employment_time_series_data

def get_fiscal_years():
    """Return the fiscal years for analysis."""
    return [2016, 2017, 2018, 2019, 2020, 2021, 2022]

def get_time_series_years():
    """Return the years for time series analysis."""
    return list(range(2013, 2026))

def combine_loading_summaries(emp_summary, sep_summary, acc_summary):
    """Combine all loading summaries into one list."""
    return emp_summary + sep_summary + acc_summary

def combine_tenure_data(employment_tenure_annual, employment_tenure_2plus):
    """Combine tenure annual and 2+ years data."""
    return pd.concat([employment_tenure_annual, employment_tenure_2plus], ignore_index=True)

def load_all_data():
    """Master function to load all data needed for the analysis."""
    fiscal_years = get_fiscal_years()
    time_series_years = get_time_series_years()
    
    employment_annual, employment_tenure_annual, agency_lookups, emp_loading_summary = \
        load_employment_data(fiscal_years, EMPLOYMENT_PARQUET_DIR)
    
    separations_annual, separations_tenure_annual, sep_loading_summary = \
        load_separations_data(fiscal_years, SEPARATIONS_PARQUET_DIR)
    
    accessions_annual, acc_loading_summary = \
        load_accessions_data(fiscal_years, SEPARATIONS_PARQUET_DIR)
    
    employment_tenure_2plus, separations_tenure_2plus = \
        load_tenure_2plus_data(fiscal_years, EMPLOYMENT_PARQUET_DIR, SEPARATIONS_PARQUET_DIR)
    
    monthly_accessions_data, monthly_separations_data = \
        load_monthly_data_for_heatmaps(SEPARATIONS_PARQUET_DIR)
    
    employment_time_series_data = \
        load_employment_time_series(EMPLOYMENT_PARQUET_DIR, time_series_years)
    
    loading_summary = combine_loading_summaries(emp_loading_summary, sep_loading_summary, acc_loading_summary)
    tenure_data = combine_tenure_data(employment_tenure_annual, employment_tenure_2plus)
    
    return {
        'employment_annual': employment_annual,
        'separations_annual': separations_annual,
        'accessions_annual': accessions_annual,
        'tenure_data': tenure_data,
        'separations_tenure_annual': separations_tenure_annual,
        'separations_tenure_2plus': separations_tenure_2plus,
        'agency_lookups': agency_lookups,
        'loading_summary': loading_summary,
        'monthly_accessions_data': monthly_accessions_data,
        'monthly_separations_data': monthly_separations_data,
        'employment_time_series_data': employment_time_series_data
    }
Show code
# Load all data
all_data = load_all_data()

Methodology

How we calculate separation rates:

  • Separation Rate = (Number of Separations ÷ Total Employment) × 100
  • Example: If an agency has 10,000 employees and 1,200 separations in a year, the separation rate is 12%

Data timing: - Employment baseline: September data from the previous calendar year (e.g., for FY2016 we use September 2015 employment) - Separations: Count of all employees who left during the fiscal year - Time period: Analysis covers FY2016-FY2022

Minimum thresholds: - Agencies: 5,000+ employees (except Agricultural Marketing Service shown separately) - States: 1,000+ federal employees

Data Loading Summary

Show code
def create_loading_summary_table(loading_summary):
    """Create a formatted table showing data loading summary."""
    loading_df = pd.DataFrame(loading_summary)
    
    gt_loading = (
        GT(loading_df)
        .tab_header(
            title="Data Loading Summary",
            subtitle="Raw records loaded and aggregated by agency"
        )
        .fmt_number(columns=['Raw Records', 'Aggregated Groups'], decimals=0, use_seps=True)
        .cols_width({
            'Year': '100px',
            'Data Type': '120px',
            'Raw Records': '140px',
            'Aggregated Groups': '160px'
        })
    )
    
    return gt_loading

# Display loading summary
create_loading_summary_table(all_data['loading_summary']).show()
Data Loading Summary
Raw records loaded and aggregated by agency
Year Data Type Raw Records Aggregated Groups
FY2016 Employment (Sept) 2,058,924 525
FY2017 Employment (Sept) 2,097,038 533
FY2018 Employment (Sept) 2,087,747 529
FY2019 Employment (Sept) 2,100,802 529
FY2020 Employment (Sept) 2,132,812 527
FY2021 Employment (Sept) 2,181,106 533
FY2022 Employment (Sept) 2,191,011 537
2015-2019 Separations 1,113,647 2,044
2020-2024 Separations 953,844 1,559
2015-2019 Accessions 1,278,187 2,013
2020-2024 Accessions 1,170,401 1,538
March 2025 Accessions 243,563 970

Data Processing and Calculations

Calculate Annual Separation Rates

Show code
def calculate_annual_separation_rates(employment_annual, separations_annual, agency_lookups):
    """Calculate annual separation rates by merging employment and separations data."""
    # Merge employment and separations by fiscal year
    annual_data = pd.merge(
        employment_annual, 
        separations_annual, 
        on=['fiscal_year', 'agysub'],
        how='outer'
    )
    
    # Fill NaN values
    annual_data['employment'] = annual_data['employment'].fillna(0)
    annual_data['separations'] = annual_data['separations'].fillna(0)
    
    # Calculate separation rate (percentage of separations)
    annual_data['separation_rate'] = np.where(
        annual_data['employment'] > 0,
        (annual_data['separations'] / annual_data['employment']) * 100,
        np.nan
    )
    
    # Filter for meaningful sample sizes - keep agencies that meet threshold in ANY year
    agencies_above_threshold = annual_data[annual_data['employment'] >= MIN_EMPLOYEES]['agysub'].unique()
    annual_data = annual_data[annual_data['agysub'].isin(agencies_above_threshold)]
    annual_data = annual_data.dropna(subset=['separation_rate'])
    
    # Merge with agency lookups
    annual_data = pd.merge(annual_data, agency_lookups, on='agysub', how='left')
    
    return annual_data

def calculate_tenure_separation_rates(tenure_data, separations_tenure_annual, 
                                   separations_tenure_2plus, agency_lookups):
    """Calculate tenure-specific separation rates."""
    # Merge A and B tenure data
    tenure_separations = pd.merge(
        tenure_data[tenure_data['loslvl'].isin(['A', 'B'])],
        separations_tenure_annual,
        on=['fiscal_year', 'agysub', 'loslvl'],
        how='outer'
    )
    
    # Merge C+ tenure data
    tenure_2plus_separations = pd.merge(
        tenure_data[tenure_data['loslvl'] == 'C+'],
        separations_tenure_2plus,
        on=['fiscal_year', 'agysub', 'loslvl'],
        how='outer'
    )
    
    # Combine all tenure data
    all_tenure_data = pd.concat([tenure_separations, tenure_2plus_separations], ignore_index=True)
    
    all_tenure_data['employment'] = all_tenure_data['employment'].fillna(0)
    all_tenure_data['separations'] = all_tenure_data['separations'].fillna(0)
    
    all_tenure_data['separation_rate'] = np.where(
        all_tenure_data['employment'] > 0,
        (all_tenure_data['separations'] / all_tenure_data['employment']) * 100,
        np.nan
    )
    
    all_tenure_data = pd.merge(all_tenure_data, agency_lookups, on='agysub', how='left')
    
    return all_tenure_data

# Calculate rates
annual_data = calculate_annual_separation_rates(
    all_data['employment_annual'], 
    all_data['separations_annual'], 
    all_data['agency_lookups']
)

tenure_data = calculate_tenure_separation_rates(
    all_data['tenure_data'],
    all_data['separations_tenure_annual'],
    all_data['separations_tenure_2plus'],
    all_data['agency_lookups']
)

Analysis Results

Top 50 Agencies with Highest Separation Rates

Show code
def get_top_separation_agencies(annual_data, min_employees=5000, top_n=50):
    """Get top N agencies by average separation rate, filtered by minimum employment."""
    # Calculate average statistics across all years for each agency
    agency_avg_stats = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    agency_avg_stats.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separation_rate']
    
    # Filter for agencies with min_employees+ employees
    agencies_filtered = agency_avg_stats[agency_avg_stats['avg_employment'] >= min_employees]
    
    # Sort agencies by average separation rate and get top N
    return agencies_filtered.nlargest(top_n, 'avg_separation_rate').copy()

def create_annual_separations_table(annual_data, high_separation_agencies):
    """Create pivot table of annual separation rates for specified agencies."""
    # Get the annual data for these agencies
    high_separation_annual = annual_data[
        annual_data['agysub'].isin(high_separation_agencies['agysub'])
    ].copy()
    
    # Create pivot table with fiscal years as columns
    annual_pivot = high_separation_annual.pivot_table(
        index=['agysub', 'agysubt'],
        columns='fiscal_year',
        values='separation_rate',
        aggfunc='mean'
    ).reset_index()
    
    # Calculate average for sorting
    year_cols = [col for col in annual_pivot.columns if isinstance(col, (int, float))]
    annual_pivot['avg_rate'] = annual_pivot[year_cols].mean(axis=1)
    
    # Sort by average rate descending
    annual_pivot = annual_pivot.sort_values('avg_rate', ascending=False)
    
    return annual_pivot, year_cols

def format_annual_separations_table(annual_pivot, year_cols):
    """Format the annual separations table for display."""
    display_data = annual_pivot.copy()
    
    # Select columns for display
    table_cols = ['agysubt'] + year_cols + ['avg_rate']
    display_data = display_data[table_cols]
    
    # Rename columns
    new_columns = ['Agency'] + [f'FY{int(col)}' for col in year_cols] + ['Avg']
    display_data.columns = new_columns
    
    # Round all numeric columns to integers
    for col in display_data.columns[1:]:
        display_data[col] = display_data[col].round(0)
        # Fill NaN with 0 before converting to int to avoid Int64 NA issues
        display_data[col] = display_data[col].fillna(0).astype(int)
    
    # Reset index for clean display
    display_data = display_data.reset_index(drop=True)
    
    # Create GT table
    gt_annual = (
        GT(display_data.head(50))
        .tab_header(
            title="Top 50 Agencies with Highest Separation Rates",
            subtitle="Annual separation rates by fiscal year (percentage) - agencies with 5000+ employees - FY2016-FY2022"
        )
        .fmt_number(columns=list(display_data.columns[1:]), decimals=0, pattern="{x}%")
        .data_color(
            columns=list(display_data.columns[1:]),
            palette=["white", HIGH_SEPARATIONS_COLOR],
            domain=[0, display_data[display_data.columns[1:]].max().max()],
            na_color="lightgray"
        )
        .cols_width({
            'Agency': '400px'
        })
    )
    
    return gt_annual

# Execute the analysis
high_separation_agencies = get_top_separation_agencies(annual_data, MIN_EMPLOYEES, 50)
annual_pivot, year_cols = create_annual_separations_table(annual_data, high_separation_agencies)
gt_annual = format_annual_separations_table(annual_pivot, year_cols)
gt_annual.show()
Top 50 Agencies with Highest Separation Rates
Annual separation rates by fiscal year (percentage) - agencies with 5000+ employees - FY2016-FY2022
Agency FY2016 FY2017 FY2018 FY2019 FY2020 FY2021 FY2022 Avg
SB00-SMALL BUSINESS ADMINISTRATION 19% 27% 71% 33% 40% 40% 63% 42%
IN10-NATIONAL PARK SERVICE 45% 43% 45% 42% 36% 37% 38% 41%
AG11-FOREST SERVICE 37% 38% 37% 37% 34% 33% 33% 35%
AFNG-AIR NATIONAL GUARD UNITS (TITLE 32) 28% 29% 28% 30% 29% 31% 35% 30%
CM63-BUREAU OF THE CENSUS 19% 25% 21% 16% 34% 46% 32% 28%
IN05-BUREAU OF LAND MANAGEMENT 30% 27% 25% 27% 24% 26% 25% 26%
NV18-NAVAL MEDICAL COMMAND 11% 11% 12% 13% 11% 13% 90% 23%
DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY 24% 22% 20% 22% 20% 24% 25% 22%
DD34-DEFENSE COMMISSARY AGENCY 21% 19% 18% 19% 22% 23% 22% 21%
ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32) 17% 19% 20% 23% 19% 21% 23% 20%
AG03-AGRICULTURAL RESEARCH SERVICE 20% 20% 21% 22% 18% 18% 18% 19%
HE37-INDIAN HEALTH SERVICE 18% 16% 16% 15% 14% 15% 15% 15%
HSCB-FEDERAL EMERGENCY MANAGEMENT AGENCY 13% 13% 28% 12% 11% 13% 13% 15%
IN15-U.S. FISH AND WILDLIFE SERVICE 13% 13% 15% 13% 11% 12% 15% 13%
ARMC-U.S. ARMY MEDICAL COMMAND 13% 12% 12% 13% 11% 14% 14% 13%
NV52-COMMANDER, NAVY INSTALLATIONS 11% 12% 12% 12% 12% 14% 15% 13%
AF0J-AIR EDUCATION AND TRAINING COMMAND 13% 9% 10% 12% 10% 11% 21% 12%
IN08-GEOLOGICAL SURVEY 13% 12% 14% 13% 11% 11% 13% 12%
IN07-BUREAU OF RECLAMATION 13% 13% 12% 12% 10% 11% 13% 12%
AG34-ANIMAL AND PLANT HEALTH INSPECTION SERVICE 11% 11% 12% 12% 11% 12% 13% 12%
TR93-INTERNAL REVENUE SERVICE 12% 11% 12% 12% 9% 12% 14% 12%
HSBC-TRANSPORTATION SECURITY ADMINISTRATION 12% 12% 12% 12% 9% 11% 15% 12%
AF1L-AIR MOBILITY COMMAND 11% 9% 10% 12% 9% 11% 18% 12%
ARHR-U.S. ARMY RESERVE COMMAND 11% 11% 10% 10% 11% 11% 14% 11%
DJ01-OFFICES, BOARDS AND DIVISIONS 11% 12% 10% 12% 10% 11% 11% 11%
ARCE-U.S. ARMY CORPS OF ENGINEERS 12% 11% 11% 11% 10% 10% 11% 11%
AF0M-HEADQUARTERS, AIR FORCE RESERVE COMMAND 13% 10% 10% 12% 9% 10% 13% 11%
AF1C-AIR COMBAT COMMAND 10% 9% 10% 11% 9% 11% 16% 11%
IN06-INDIAN AFFAIRS 12% 11% 13% 10% 9% 9% 11% 11%
NV27-U.S. MARINE CORPS 10% 10% 10% 10% 9% 11% 13% 10%
FD00-FEDERAL DEPOSIT INSURANCE CORPORATION 12% 10% 10% 9% 9% 10% 12% 10%
ARBA-U.S. ARMY INSTALLATION MANAGEMENT COMMAND 12% 12% 12% 12% 0% 0% 0% 10%
VATA-VETERANS HEALTH ADMINISTRATION 10% 9% 9% 9% 9% 10% 11% 10%
DD04-DEFENSE INFORMATION SYSTEMS AGENCY 9% 7% 11% 9% 10% 10% 10% 10%
NV23-NAVAL SUPPLY SYSTEMS COMMAND 10% 9% 10% 10% 8% 9% 11% 10%
DD35-DEFENSE FINANCE AND ACCOUNTING SERVICE 9% 8% 9% 8% 7% 14% 10% 9%
HSAC-U.S. COAST GUARD 10% 8% 9% 9% 9% 9% 11% 9%
DN00-DEPARTMENT OF ENERGY 9% 10% 9% 8% 8% 10% 10% 9%
DJ09-EXECUTIVE OFFICE FOR U.S. ATTORNEYS AND THE OFFICES OF THE U.S. ATTORNEYS 8% 9% 8% 10% 8% 9% 11% 9%
DD07-DEFENSE LOGISTICS AGENCY 9% 8% 8% 9% 9% 9% 11% 9%
NV25-NAVAL FACILITIES ENGINEERING COMMAND 9% 8% 9% 9% 9% 10% 10% 9%
DD63-DEFENSE CONTRACT MANAGEMENT AGENCY 10% 8% 9% 9% 10% 7% 9% 9%
ARX7-U.S. ARMY TANK-AUTOMOTIVE AND ARMAMENT COMMAND (TACOM) 8% 6% 9% 7% 7% 9% 13% 8%
AG16-NATURAL RESOURCES CONSERVATION SERVICE 8% 8% 9% 8% 8% 8% 10% 8%
ARTC-U.S. ARMY TRAINING AND DOCTRINE COMMAND 8% 8% 7% 8% 9% 9% 10% 8%
AG37-FOOD SAFETY AND INSPECTION SERVICE 7% 7% 8% 8% 9% 9% 10% 8%
ARXQ-U.S. ARMY JOINT MUNITIONS COMMAND 8% 7% 8% 9% 8% 8% 11% 8%
HE38-NATIONAL INSTITUTES OF HEALTH 9% 8% 9% 8% 7% 8% 8% 8%
HE10-OFFICE OF THE SECRETARY OF HEALTH AND HUMAN SERVICES 6% 9% 11% 8% 5% 10% 8% 8%
DJ03-BUREAU OF PRISONS/FEDERAL PRISON SYSTEM 7% 8% 7% 7% 7% 8% 10% 8%

Tenure-Specific Separation Analysis

Show code
def create_tenure_analysis_table(tenure_data, high_separation_agencies):
    """Create tenure-specific separation analysis for specified agencies."""
    # Get tenure data for the same agencies
    tenure_summary_filtered = tenure_data[
        tenure_data['agysub'].isin(high_separation_agencies['agysub'])
    ].groupby(['agysub', 'agysubt', 'loslvl']).agg({
        'employment': 'mean',
        'separations': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    
    # Pivot tenure data to show side-by-side comparison
    tenure_pivot = tenure_summary_filtered.pivot_table(
        index=['agysub', 'agysubt'],
        columns='loslvl',
        values='separation_rate',
        aggfunc='mean'
    ).reset_index()
    
    # Add overall separation rate from the high_separation_agencies data
    overall_rates = high_separation_agencies[['agysub', 'avg_separation_rate', 'avg_employment']].copy()
    tenure_pivot = pd.merge(
        tenure_pivot,
        overall_rates,
        on='agysub',
        how='left'
    )
    
    # Sort by average separation rate to match the order from the previous table
    tenure_pivot = tenure_pivot.sort_values('avg_separation_rate', ascending=False).head(50)
    
    return tenure_pivot

def format_tenure_table(tenure_pivot):
    """Format the tenure analysis table for display."""
    # Prepare display data
    tenure_cols = ['agysubt', 'A', 'B', 'C+', 'avg_separation_rate', 'avg_employment']
    available_cols = [col for col in tenure_cols if col in tenure_pivot.columns]
    tenure_display = tenure_pivot[available_cols].copy()
    
    # Rename columns
    col_mapping = {
        'agysubt': 'Agency',
        'A': '<1 Year', 
        'B': '1-2 Years',
        'C+': '2+ Years',
        'avg_separation_rate': 'Overall',
        'avg_employment': 'Average Employment'
    }
    tenure_display.rename(columns=col_mapping, inplace=True)
    
    # Round numeric columns to integers
    separation_cols = ['<1 Year', '1-2 Years', '2+ Years', 'Overall']
    for col in separation_cols:
        if col in tenure_display.columns:
            tenure_display[col] = tenure_display[col].round(0).fillna(0).astype(int)
    if 'Average Employment' in tenure_display.columns:
        tenure_display['Average Employment'] = tenure_display['Average Employment'].round(0).fillna(0).astype(int)
    
    # Reset index
    tenure_display = tenure_display.reset_index(drop=True)
    
    # Find the maximum value across all separation columns for color scaling
    separation_cols_present = [col for col in separation_cols if col in tenure_display.columns]
    max_separations = tenure_display[separation_cols_present].max().max()
    
    # Create GT table
    gt_tenure = (
        GT(tenure_display)
        .tab_header(
            title="Annual Agency Separations by Employee Tenure",
            subtitle="Average annual separation rates by tenure group and overall (FY2016-FY2022)"
        )
        .fmt_number(columns=separation_cols_present, decimals=0, pattern="{x}%")
        .fmt_number(columns=['Average Employment'], decimals=0, use_seps=True)
        .data_color(
            columns=separation_cols_present,
            palette=["white", HIGH_SEPARATIONS_COLOR],
            domain=[0, max_separations],
            na_color="lightgray"
        )
        .cols_width({
            'Agency': '350px',
            '<1 Year': '80px',
            '1-2 Years': '80px',
            '2+ Years': '80px',
            'Overall': '80px',
            'Average Employment': '120px'
        })
    )
    
    return gt_tenure

# Execute the analysis
tenure_pivot = create_tenure_analysis_table(tenure_data, high_separation_agencies)
gt_tenure = format_tenure_table(tenure_pivot)
gt_tenure.show()
Annual Agency Separations by Employee Tenure
Average annual separation rates by tenure group and overall (FY2016-FY2022)
Agency <1 Year 1-2 Years 2+ Years Overall Average Employment
SB00-SMALL BUSINESS ADMINISTRATION 107% 47% 18% 42% 6,126
IN10-NATIONAL PARK SERVICE 117% 93% 21% 41% 21,673
AG11-FOREST SERVICE 96% 72% 16% 35% 36,646
AFNG-AIR NATIONAL GUARD UNITS (TITLE 32) 76% 29% 17% 30% 20,885
CM63-BUREAU OF THE CENSUS 66% 26% 12% 28% 17,452
IN05-BUREAU OF LAND MANAGEMENT 103% 60% 14% 26% 10,468
NV18-NAVAL MEDICAL COMMAND 29% 27% 22% 23% 11,106
DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY 30% 49% 13% 22% 13,543
DD34-DEFENSE COMMISSARY AGENCY 61% 30% 12% 21% 12,748
ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32) 45% 20% 14% 20% 27,237
AG03-AGRICULTURAL RESEARCH SERVICE 55% 43% 9% 19% 7,104
HE37-INDIAN HEALTH SERVICE 48% 20% 10% 15% 13,687
HSCB-FEDERAL EMERGENCY MANAGEMENT AGENCY 34% 12% 9% 15% 18,546
IN15-U.S. FISH AND WILDLIFE SERVICE 60% 31% 8% 13% 8,679
ARMC-U.S. ARMY MEDICAL COMMAND 17% 20% 11% 13% 38,750
NV52-COMMANDER, NAVY INSTALLATIONS 21% 21% 11% 13% 11,112
AF0J-AIR EDUCATION AND TRAINING COMMAND 33% 15% 10% 12% 15,045
IN08-GEOLOGICAL SURVEY 41% 23% 9% 12% 8,105
IN07-BUREAU OF RECLAMATION 39% 16% 10% 12% 5,314
AG34-ANIMAL AND PLANT HEALTH INSPECTION SERVICE 32% 21% 8% 12% 8,208
TR93-INTERNAL REVENUE SERVICE 140% 25% 8% 12% 76,615
HSBC-TRANSPORTATION SECURITY ADMINISTRATION 33% 16% 8% 12% 60,896
AF1L-AIR MOBILITY COMMAND 30% 14% 10% 12% 7,989
ARHR-U.S. ARMY RESERVE COMMAND 11% 13% 11% 11% 8,512
DJ01-OFFICES, BOARDS AND DIVISIONS 21% 20% 9% 11% 6,425
ARCE-U.S. ARMY CORPS OF ENGINEERS 38% 16% 9% 11% 35,022
AF0M-HEADQUARTERS, AIR FORCE RESERVE COMMAND 13% 13% 10% 11% 11,952
AF1C-AIR COMBAT COMMAND 17% 16% 10% 11% 11,337
IN06-INDIAN AFFAIRS 36% 18% 8% 11% 7,008
NV27-U.S. MARINE CORPS 16% 14% 10% 10% 18,364
FD00-FEDERAL DEPOSIT INSURANCE CORPORATION 42% 19% 8% 10% 6,072
ARBA-U.S. ARMY INSTALLATION MANAGEMENT COMMAND 28% 16% 8% 10% 23,787
VATA-VETERANS HEALTH ADMINISTRATION 23% 12% 7% 10% 354,495
DD04-DEFENSE INFORMATION SYSTEMS AGENCY 15% 10% 9% 10% 5,944
NV23-NAVAL SUPPLY SYSTEMS COMMAND 12% 9% 10% 10% 6,772
DD35-DEFENSE FINANCE AND ACCOUNTING SERVICE 16% 11% 9% 9% 11,470
HSAC-U.S. COAST GUARD 10% 10% 9% 9% 8,610
DN00-DEPARTMENT OF ENERGY 14% 12% 9% 9% 13,258
DJ09-EXECUTIVE OFFICE FOR U.S. ATTORNEYS AND THE OFFICES OF THE U.S. ATTORNEYS 9% 12% 9% 9% 11,479
DD07-DEFENSE LOGISTICS AGENCY 16% 11% 8% 9% 24,611
NV25-NAVAL FACILITIES ENGINEERING COMMAND 14% 9% 9% 9% 15,969
DD63-DEFENSE CONTRACT MANAGEMENT AGENCY 9% 8% 9% 9% 11,299
ARX7-U.S. ARMY TANK-AUTOMOTIVE AND ARMAMENT COMMAND (TACOM) 13% 15% 8% 8% 11,677
AG16-NATURAL RESOURCES CONSERVATION SERVICE 21% 16% 7% 8% 9,932
ARTC-U.S. ARMY TRAINING AND DOCTRINE COMMAND 13% 11% 8% 8% 10,554
AG37-FOOD SAFETY AND INSPECTION SERVICE 19% 11% 7% 8% 9,196
ARXQ-U.S. ARMY JOINT MUNITIONS COMMAND 16% 12% 7% 8% 5,401
HE38-NATIONAL INSTITUTES OF HEALTH 25% 12% 6% 8% 18,600
HE10-OFFICE OF THE SECRETARY OF HEALTH AND HUMAN SERVICES 121% 13% 4% 8% 9,610
DJ03-BUREAU OF PRISONS/FEDERAL PRISON SYSTEM 17% 8% 7% 8% 37,179

Average Number of Separations vs. Separation Rate

Show code
def create_scatter_plot_data(annual_data, min_employees=5000):
    """Prepare data for average separations vs separation rate scatter plot."""
    scatter_data = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separations': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    scatter_data.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separations', 'avg_separation_rate']
    scatter_data = scatter_data[scatter_data['avg_employment'] >= min_employees]
    scatter_data['avg_separation_rate'] = scatter_data['avg_separation_rate'].round(0)
    scatter_data['avg_separations'] = scatter_data['avg_separations'].round(0)
    scatter_data['avg_employment'] = scatter_data['avg_employment'].round(0)
    return scatter_data

def create_scatter_plot(scatter_data):
    """Create interactive scatter plot of average separations vs separation rate."""
    fig = px.scatter(
        scatter_data, 
        x='avg_separations', 
        y='avg_separation_rate',
        hover_data={'agysubt': True, 'avg_separations': ':,.0f', 'avg_separation_rate': ':.0f'},
        labels={
            'avg_separations': 'Average Annual Number of Separations',
            'avg_separation_rate': 'Average Annual Separation Rate (%)',
            'agysubt': 'Agency'
        },
        title='Average Number of Separations vs. Separation Rate'
    )
    
    # Update layout
    fig.update_traces(
        marker=dict(size=10, color=HIGH_SEPARATIONS_COLOR, opacity=0.7),
        hovertemplate='<b>%{customdata[0]}</b><br>' +
                      'Average Separations: %{x:,.0f}<br>' +
                      'Separation Rate: %{y:.0f}%<br>' +
                      '<extra></extra>'
    )
    
    fig.update_layout(
        xaxis_title="Average Annual Number of Separations",
        yaxis_title="Average Annual Separation Rate (%)",
        hovermode='closest',
        height=600,
        annotations=[
            dict(
                text="Federal Agencies with 5,000+ Employees (FY2016-FY2022)",
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.5,
                y=1.05,
                xanchor="center",
                yanchor="bottom",
                font=dict(size=12, color="gray")
            )
        ]
    )
    
    return fig

# Execute the analysis
scatter_data = create_scatter_plot_data(annual_data, MIN_EMPLOYEES)
fig = create_scatter_plot(scatter_data)
fig.show()

Detailed Analysis of Top 10 Agencies with Most Separations

Net Change Heatmaps

Show code
def get_top_10_separation_agencies(annual_data, min_employees=5000):
    """Get top 10 agencies by separation rate."""
    agency_stats = annual_data.groupby(['agysub', 'agysubt']).agg({
        'employment': 'mean',
        'separation_rate': 'mean'
    }).reset_index()
    agency_stats.columns = ['agysub', 'agysubt', 'avg_employment', 'avg_separation_rate']
    agency_stats = agency_stats[agency_stats['avg_employment'] >= min_employees]
    return agency_stats.nlargest(10, 'avg_separation_rate').copy()

def create_agency_heatmap(agency_code, agency_name, avg_separation_rate, 
                         monthly_accessions_data, monthly_separations_data):
    """Create net change heatmap for a single agency."""
    # Get monthly data for this agency
    monthly_acc_data = monthly_accessions_data.get(agency_code, [])
    monthly_sep_data = monthly_separations_data.get(agency_code, [])
    
    if monthly_acc_data and monthly_sep_data:
        # Combine monthly data
        monthly_accessions = pd.concat(monthly_acc_data).groupby(['year', 'month'])['count'].sum().reset_index()
        monthly_separations = pd.concat(monthly_sep_data).groupby(['year', 'month'])['count'].sum().reset_index()
        
        # Merge accessions and separations
        monthly_combined = pd.merge(
            monthly_accessions, 
            monthly_separations, 
            on=['year', 'month'], 
            how='outer',
            suffixes=('_acc', '_sep')
        ).fillna(0)
        
        monthly_combined['accessions'] = monthly_combined['count_acc'].astype(int)
        monthly_combined['separations'] = monthly_combined['count_sep'].astype(int)
        monthly_combined['net_change'] = monthly_combined['accessions'] - monthly_combined['separations']
        monthly_combined = monthly_combined.drop(['count_acc', 'count_sep'], axis=1)
        
        # Create pivot table for net change
        net_pivot = monthly_combined.pivot(index='month', columns='year', values='net_change')
        
        # Ensure all years 2016-2025 are present
        all_years = list(range(2016, 2026))
        for year in all_years:
            if year not in net_pivot.columns:
                net_pivot[year] = np.nan
        net_pivot = net_pivot[all_years]
        
        # Create dataframe for GT table
        net_df = net_pivot.reset_index()
        net_df['month'] = pd.to_datetime(net_df['month'], format='%m').dt.strftime('%B')
        
        # Convert year columns to strings
        net_df.columns = ['month'] + [str(col) for col in net_df.columns[1:]]
        
        # Calculate max absolute value for symmetric color scale
        max_abs_val = abs(net_pivot.fillna(0).values).max()
        
        # Create GT table
        year_columns = [col for col in net_df.columns if col != 'month']
        gt_net = (
            GT(net_df)
            .tab_header(
                title=f"{agency_name}",
                subtitle=f"Monthly Net Change (2016-2025) - Average Separation Rate: {avg_separation_rate:.0f}%"
            )
            .fmt_number(columns=year_columns, decimals=0, use_seps=True)
            .data_color(
                columns=year_columns,
                palette=[NET_NEGATIVE_COLOR, "white", NET_POSITIVE_COLOR],
                domain=[-max_abs_val, max_abs_val],
                na_color="lightgray"
            )
            .cols_width({
                'month': '100px'
            })
        )
        
        return gt_net
    return None

def display_all_heatmaps(annual_data, monthly_accessions_data, monthly_separations_data):
    """Display heatmaps for all top 10 agencies."""
    high_separation_agencies = get_top_10_separation_agencies(annual_data, MIN_EMPLOYEES)
    
    for idx, agency_row in high_separation_agencies.iterrows():
        agency_code = agency_row['agysub']
        agency_name = agency_row['agysubt']
        avg_separation_rate = agency_row['avg_separation_rate']
        
        gt_table = create_agency_heatmap(
            agency_code, agency_name, avg_separation_rate,
            monthly_accessions_data, 
            monthly_separations_data
        )
        
        if gt_table:
            gt_table.show()

# Execute the heatmap display
display_all_heatmaps(annual_data, all_data['monthly_accessions_data'], all_data['monthly_separations_data'])
SB00-SMALL BUSINESS ADMINISTRATION
Monthly Net Change (2016-2025) - Average Separation Rate: 42%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −35 −169 −226 −333 −32 −511 76 507 −189
February −8 −42 −393 −176 −23 −294 79 162 −65
March −29 −32 −439 −160 165 −272 −319 −67 −118
April −77 −87 −432 −38 424 −96 −303 25 −20
May −31 −16 −371 −22 675 −49 −3,353 20 22
June −19 −6 −98 −42 779 187 −95 71 43
July 9 −30 −103 30 1,071 168 −115 −70 −96
August 25 −1 −28 −15 1,327 117 −32 −33 16
September 302 1,420 212 87 842 169 −58 −4 −40
October 140 1,592 187 −156 463 163 451 355
November −150 425 80 −69 554 139 630 279
December −56 75 −275 −49 −233 32 459 53
IN10-NATIONAL PARK SERVICE
Monthly Net Change (2016-2025) - Average Separation Rate: 41%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −113 46 −185 −171 −52 −52 44 160 98
February −13 −43 −76 111 −15 42 −25 86 −160
March 269 −42 −36 312 359 298 241 97 −144
April 1,150 2,057 2,172 2,022 312 1,641 1,542 1,590 1,495
May 4,056 2,752 2,992 2,899 2,626 2,732 2,649 2,468 2,392
June 852 871 854 935 1,453 607 772 673 704
July 143 171 113 32 283 −80 35 109 −9
August −547 −496 −586 −622 −225 −392 −477 −395 −400
September −2,299 −2,543 −2,383 −2,007 −1,400 −1,557 −1,495 −1,458 −1,246
October −2,192 −2,066 −2,469 −2,236 −1,631 −2,154 −1,973 −1,628
November −871 −1,031 −797 −827 −813 −799 −657 −927
December −378 −423 −323 −230 −377 −244 −276 −12
AG11-FOREST SERVICE
Monthly Net Change (2016-2025) - Average Separation Rate: 35%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −167 251 −211 −299 −182 −153 −89 290 −255
February 29 −65 −17 −134 −33 64 90 99 −168
March 433 108 86 520 1,078 561 684 724 68
April 1,708 4,094 3,694 3,366 2,338 2,623 2,570 2,522 1,510
May 7,366 4,879 5,001 4,618 5,266 4,190 3,206 2,937 2,538
June 970 1,011 888 1,074 1,140 787 1,068 1,671 1,392
July 103 −20 53 −33 −38 −314 390 407 −148
August −1,351 −1,315 −1,195 −1,208 −856 −1,102 −800 −840 −892
September −1,955 −2,549 −2,177 −2,027 −1,389 −1,474 −1,254 −865 −1,004
October −4,103 −3,685 −3,398 −2,935 −2,432 −2,654 −1,754 −1,642
November −2,128 −2,342 −2,238 −2,163 −2,306 −2,017 −1,479 −1,137
December −716 −675 −697 −852 −873 −963 −757 −480
AFNG-AIR NATIONAL GUARD UNITS (TITLE 32)
Monthly Net Change (2016-2025) - Average Separation Rate: 30%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January 115 58 −21 92 −43 65 −47 184 −66
February 196 −33 173 245 25 28 −150 62 −38
March −69 −38 −158 −89 3 −82 −365 −95 −319
April −64 80 232 −131 −136 −170 −300 3 −139
May 398 297 119 −38 −142 73 −181 21 36
June 123 118 −66 −168 −128 −20 −163 34 136
July −20 −31 −83 −42 −140 −155 −224 105 −32
August −173 −316 −285 −241 −94 −27 −197 −121 −218
September −747 −1,154 −506 −462 −559 −742 −739 −554 −610
October 724 1,127 478 294 411 113 175 284
November 227 158 398 45 165 −10 39 68
December −148 −117 −128 −137 −90 −242 −167 −133
CM63-BUREAU OF THE CENSUS
Monthly Net Change (2016-2025) - Average Separation Rate: 28%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −55 326 26 167 766 −268 −84 318 −129
February 98 76 −13 194 798 169 −372 692 −392
March 232 149 −56 178 831 667 −505 1,177 −415
April 174 373 185 1,055 104 840 −173 525 −37
May 126 465 63 1,289 121 442 −98 119 72
June 110 381 70 999 −48 −112 −175 −586 −94
July 77 −97 67 821 −381 −191 −140 −271 −139
August −67 −139 11 438 −347 −294 −65 −354 −7
September 170 −279 0 434 −1,324 −1,242 −125 −360 −3
October 143 −352 210 207 −1,499 −532 −52 −20
November 220 −582 95 −206 −1,734 107 59 23
December 17 −100 −55 353 −377 121 177 15
IN05-BUREAU OF LAND MANAGEMENT
Monthly Net Change (2016-2025) - Average Separation Rate: 26%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −53 115 −53 −55 −55 −7 −32 35 38
February 24 67 −23 −19 −65 6 24 55 −83
March 35 −46 −10 85 63 95 60 81 −47
April 227 376 449 382 334 310 307 354 290
May 1,296 923 929 911 1,036 949 892 797 676
June 323 246 215 286 309 282 174 256 327
July 5 2 52 56 13 −50 0 107 64
August −306 −283 −292 −301 −209 −204 −246 −154 −174
September −563 −621 −445 −501 −343 −349 −274 −259 −138
October −587 −417 −567 −438 −599 −597 −436 −347
November −183 −215 −186 −261 −299 −219 −220 −177
December −87 −149 −148 −141 −157 −173 −174 −54
NV18-NAVAL MEDICAL COMMAND
Monthly Net Change (2016-2025) - Average Separation Rate: 23%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −33 −7 15 49 −50 −12 −6 2 −2
February 100 −48 41 8 −60 −12 0 −1 13
March 61 −70 −18 23 30 51 16 3 −15
April 14 0 71 94 −28 −10 −13 −10 −2
May 80 39 19 −11 −13 −14 −597 −1 6
June 38 0 9 −41 3 −34 −23 −3 14
July 35 −9 28 −5 −19 −90 −3,209 −14 16
August 27 8 19 −72 −11 8 −3,724 −17 2
September 4 −25 17 49 −28 −21 −609 19 −4
October 76 58 73 14 −11 −21 −135 15
November 55 41 47 −17 24 −91 4 6
December −51 −75 −77 −83 −98 −114 −21 −1
DD16-DEPARTMENT OF DEFENSE EDUCATION ACTIVITY
Monthly Net Change (2016-2025) - Average Separation Rate: 22%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January 112 56 100 179 162 −16 197 263 168
February 101 −60 208 93 60 92 67 117 158
March 77 46 150 78 94 145 49 57 −72
April 33 51 170 203 22 61 66 82 120
May −3 −66 −1 −15 −81 −17 −14 7 13
June −1,014 −1,031 −734 −1,120 −804 −777 −736 −774 −872
July −305 −300 −266 −192 −260 −373 −389 −373 −303
August 164 274 486 295 227 438 344 422 625
September 49 19 116 187 109 141 201 221 248
October 169 170 209 189 111 125 214 246
November 129 94 154 186 168 −53 150 229
December 49 4 20 72 −20 −6 98 165
DD34-DEFENSE COMMISSARY AGENCY
Monthly Net Change (2016-2025) - Average Separation Rate: 21%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −17 14 −73 −3 6 −7 −32 115 31
February 10 −79 3 −29 −11 5 69 37 56
March 28 −167 −27 −73 356 −137 37 74 −180
April 7 −109 28 23 674 −166 −8 26 42
May 106 227 30 −94 169 −150 25 −25 −7
June 171 82 −109 −78 −15 −56 8 −26 42
July 6 34 −95 −72 −112 −90 −31 53 105
August −89 −74 −91 −153 −250 2 31 28 −49
September −145 −183 −133 −42 −128 −42 9 4 −6
October 129 141 −38 −33 −105 −57 13 67
November 130 −27 −31 −28 19 −103 45 23
December −38 −33 −94 −29 −44 −81 −73 53
ARNG-ARMY NATIONAL GUARD UNITS (TITLE 32)
Monthly Net Change (2016-2025) - Average Separation Rate: 20%
month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
January −42 57 25 49 68 178 −119 365 47
February 30 14 194 105 57 −33 −152 126 79
March −19 −32 −12 53 299 −251 −195 26 −186
April −54 203 482 39 114 −33 −111 72 37
May 225 109 314 30 −19 −23 −131 38 −15
June 82 195 222 −34 −11 −128 −100 44 120
July 8 31 24 10 27 −203 −74 229 5
August −11 −54 −96 −135 163 113 −101 46 −67
September −380 −586 −330 −222 −321 −534 −305 −266 −267
October 257 442 332 257 353 −4 184 233
November −34 83 112 191 326 −56 93 69
December −59 10 −59 −38 67 −39 −22 125

Employment Over Time

Show code
def create_employment_graph(agency_code, agency_name, employment_time_series_data):
    """Create employment over time graph for a single agency."""
    employment_timeline = employment_time_series_data.get(agency_code, [])
    
    if employment_timeline:
        # Convert to DataFrame and sort
        emp_df = pd.DataFrame(employment_timeline)
        emp_df['sort_key'] = emp_df['year'] * 10 + emp_df['month'].map({'March': 1, 'September': 2})
        emp_df = emp_df.sort_values('sort_key')
        
        # Create line graph
        fig, ax = plt.subplots(figsize=(12, 6))
        
        # Separate March and September data for different colors
        march_data = emp_df[emp_df['month'] == 'March']
        sept_data = emp_df[emp_df['month'] == 'September']
        
        # Plot lines
        ax.plot(emp_df['period'], emp_df['employment'], 
                'k-', alpha=0.3, linewidth=1)
        
        # Plot points
        if not march_data.empty:
            ax.scatter(march_data['period'], march_data['employment'], 
                      color='blue', s=100, label='March', zorder=5)
        if not sept_data.empty:
            ax.scatter(sept_data['period'], sept_data['employment'], 
                      color='orange', s=100, label='September', zorder=5)
        
        # Formatting
        ax.set_xlabel('Period', fontsize=12)
        ax.set_ylabel('Total Employment', fontsize=12)
        ax.set_title(f'{agency_name} Employment Over Time', fontsize=14, fontweight='bold')
        
        # Rotate x-axis labels
        plt.xticks(rotation=45, ha='right')
        
        # Add grid
        ax.grid(True, alpha=0.3)
        
        # Set y-axis to start at 0
        ax.set_ylim(bottom=0)
        
        # Format y-axis with commas
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{int(x):,}'))
        
        # Add legend
        ax.legend()
        
        # Tight layout
        plt.tight_layout()
        plt.show()

def display_all_employment_graphs(annual_data, employment_time_series_data):
    """Display employment graphs for all top 10 agencies."""
    high_separation_agencies = get_top_10_separation_agencies(annual_data, MIN_EMPLOYEES)
    
    for idx, agency_row in high_separation_agencies.iterrows():
        agency_code = agency_row['agysub']
        agency_name = agency_row['agysubt']
        
        create_employment_graph(agency_code, agency_name, employment_time_series_data)

# Execute the employment graph display
display_all_employment_graphs(annual_data, all_data['employment_time_series_data'])

State-Level Analysis

Show code
# State mapping from numeric codes to abbreviations
state_mapping = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
    '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
    '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
    '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
    '24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
    '29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
    '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
    '45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
    '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
    '56': 'WY'
}

# Load state-level employment data
state_employment_all = []
fiscal_years = get_fiscal_years()

for fy in fiscal_years:
    baseline_year = fy - 1
    file_path = f"fedscope_employment_September_{baseline_year}.parquet"
    
    try:
        df = pd.read_parquet(EMPLOYMENT_PARQUET_DIR + file_path)
        df['employment_num'] = df['employment'].apply(clean_employment)
        
        # Filter for US states only
        df_states = df[df['loc'].isin(state_mapping.keys())].copy()
        df_states['state'] = df_states['loc'].map(state_mapping)
        
        # Aggregate by state
        state_grouped = df_states.groupby(['state']).agg({
            'employment_num': 'sum'
        }).reset_index()
        state_grouped['fiscal_year'] = fy
        state_grouped.rename(columns={'employment_num': 'employment'}, inplace=True)
        
        state_employment_all.append(state_grouped)
        
    except Exception as e:
        print(f"Error loading FY{fy}: {e}")

employment_state = pd.concat(state_employment_all, ignore_index=True)

# Load state-level separations data
separations_files = [
    "fedscope_separations_FY2015-2019.parquet",
    "fedscope_separations_FY2020-2024.parquet"
]

state_separations_all = []

for file in separations_files:
    try:
        df = pd.read_parquet(SEPARATIONS_PARQUET_DIR + file)
        
        if 'loc' not in df.columns:
            continue
            
        df['fiscal_year'] = df['efdate'].apply(calculate_fiscal_year)
        df = df[df['fiscal_year'].isin(fiscal_years)]
        
        # Filter for US states only
        df_states = df[df['loc'].isin(state_mapping.keys())].copy()
        df_states['state'] = df_states['loc'].map(state_mapping)
        
        # Aggregate by state and fiscal year
        state_grouped = df_states.groupby(['fiscal_year', 'state']).size().reset_index(name='separations')
        
        state_separations_all.append(state_grouped)
        
    except Exception as e:
        print(f"Error: {e}")

separations_state = pd.concat(state_separations_all, ignore_index=True)

# Merge employment and separations
state_annual = pd.merge(
    employment_state,
    separations_state,
    on=['fiscal_year', 'state'],
    how='outer'
)

# Fill NaN values
state_annual['employment'] = state_annual['employment'].fillna(0)
state_annual['separations'] = state_annual['separations'].fillna(0)

# Calculate separation rate
state_annual['separation_rate'] = np.where(
    state_annual['employment'] > 0,
    (state_annual['separations'] / state_annual['employment']) * 100,
    np.nan
)

# Filter for meaningful sample sizes (at least 1000 federal employees)
state_annual = state_annual[state_annual['employment'] >= 1000]
state_annual = state_annual.dropna(subset=['separation_rate'])

# Calculate average statistics by state
state_avg_stats = state_annual.groupby('state').agg({
    'employment': 'mean',
    'separations': 'mean', 
    'separation_rate': 'mean'
}).reset_index()

state_avg_stats.columns = ['State', 'Avg Employment', 'Avg Separations', 'Avg Separation Rate']

# Sort by separation rate
state_avg_stats = state_avg_stats.sort_values('Avg Separation Rate', ascending=False)

State Separation Rates Map

Show code
# Create map data
map_data = state_avg_stats.copy()
map_data.rename(columns={'State': 'state', 'Avg Separation Rate': 'separation_rate'}, inplace=True)
map_data['separation_rate'] = map_data['separation_rate'].round(1)

# Create choropleth
fig = px.choropleth(
    map_data,
    locations='state',
    locationmode='USA-states',
    color='separation_rate',
    color_continuous_scale='Reds',
    scope='usa',
    labels={'separation_rate': 'Avg Separation Rate (%)'},
    title='Federal Employee Separation Rates by State (FY2016-2022)',
    hover_data={'separation_rate': ':.1f'}
)

fig.update_layout(
    title={
        'text': 'Federal Employee Separation Rates by State (FY2016-2022)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    geo=dict(
        bgcolor='rgba(0,0,0,0)',
        lakecolor='rgb(255, 255, 255)',
    ),
    width=1100,
    height=600
)

fig.show()

State Separation Rates by Year

Show code
# Get top 15 states by separation rate
top_states = state_avg_stats.head(15)['State'].tolist()

# Filter data for these states
annual_data = state_annual[state_annual['state'].isin(top_states)].copy()

# Create pivot table for annual rates
annual_pivot = annual_data.pivot_table(
    index='state',
    columns='fiscal_year',
    values='separation_rate',
    aggfunc='mean'
)

# Get summary stats for these states
summary_stats = state_avg_stats[state_avg_stats['State'].isin(top_states)].set_index('State')

# Merge annual rates with summary stats
combined_data = annual_pivot.merge(
    summary_stats[['Avg Employment', 'Avg Separations', 'Avg Separation Rate']], 
    left_index=True, 
    right_index=True
)

# Sort by average separation rate
combined_data = combined_data.sort_values('Avg Separation Rate', ascending=False)

# Reset index to make State a column
combined_data = combined_data.reset_index()
combined_data.rename(columns={'index': 'State'}, inplace=True)

# Prepare columns for display
year_columns = [col for col in combined_data.columns if isinstance(col, (int, float)) and col < 2100]
year_columns = sorted(year_columns)

# Select and order columns
display_columns = ['State'] + year_columns + ['Avg Separation Rate', 'Avg Employment', 'Avg Separations']
combined_data = combined_data[display_columns]

# Rename year columns
column_rename = {'State': 'State'}
for year in year_columns:
    column_rename[year] = f'FY{int(year)}'
column_rename['Avg Separation Rate'] = 'Avg Rate'
column_rename['Avg Employment'] = 'Avg Employment'
column_rename['Avg Separations'] = 'Avg Separations'

combined_data.rename(columns=column_rename, inplace=True)

# Round values
fy_columns = [col for col in combined_data.columns if col.startswith('FY')]
for col in fy_columns:
    combined_data[col] = combined_data[col].round(0).fillna(0).astype(int)
combined_data['Avg Rate'] = combined_data['Avg Rate'].round(1)
combined_data['Avg Employment'] = combined_data['Avg Employment'].round(0).astype(int)
combined_data['Avg Separations'] = combined_data['Avg Separations'].round(0).astype(int)

# Create GT table
gt_states_comprehensive = (
    GT(combined_data)
    .tab_header(
        title="Top 15 States by Federal Employee Separation Rate",
        subtitle="Annual separation rates (FY2016-2022) with average employment and separations"
    )
    .fmt_number(columns=fy_columns, decimals=0, pattern="{x}%")
    .fmt_number(columns=['Avg Rate'], decimals=1, pattern="{x}%")
    .fmt_number(columns=['Avg Employment', 'Avg Separations'], decimals=0, use_seps=True)
    .data_color(
        columns=fy_columns + ['Avg Rate'],
        palette=["white", HIGH_SEPARATIONS_COLOR],
        domain=[0, combined_data[fy_columns + ['Avg Rate']].max().max()],
        na_color="lightgray"
    )
    .cols_width({
        'State': '80px',
        'Avg Employment': '120px',
        'Avg Separations': '120px',
        'Avg Rate': '80px'
    })
    .tab_spanner(
        label="Annual Separation Rates",
        columns=fy_columns
    )
    .tab_spanner(
        label="Averages",
        columns=['Avg Rate', 'Avg Employment', 'Avg Separations']
    )
)

gt_states_comprehensive.show()
Top 15 States by Federal Employee Separation Rate
Annual separation rates (FY2016-2022) with average employment and separations
State Annual Separation Rates Averages
FY2016 FY2017 FY2018 FY2019 FY2020 FY2021 FY2022 Avg Rate Avg Employment Avg Separations
WY 29% 30% 29% 27% 25% 25% 27% 27.6% 6,374 1,760
MT 28% 27% 29% 27% 26% 25% 26% 26.7% 10,387 2,774
ID 28% 29% 27% 26% 24% 25% 26% 26.3% 9,773 2,567
OR 21% 20% 20% 20% 18% 20% 21% 20.0% 20,227 4,040
AK 20% 19% 19% 18% 17% 18% 20% 18.7% 11,231 2,104
SD 18% 17% 18% 18% 15% 17% 18% 17.1% 8,386 1,436
NV 15% 14% 18% 14% 13% 16% 16% 15.2% 11,646 1,770
AZ 16% 15% 14% 14% 14% 15% 16% 14.9% 32,209 4,801
CO 15% 14% 15% 15% 13% 14% 16% 14.6% 37,722 5,522
NM 15% 14% 15% 14% 13% 14% 16% 14.4% 21,899 3,157
ND 14% 13% 15% 14% 12% 15% 17% 14.2% 5,500 780
CA 13% 12% 13% 13% 12% 13% 16% 13.2% 143,798 19,005
VT 14% 12% 14% 13% 12% 13% 15% 13.1% 3,249 426
UT 13% 13% 13% 13% 11% 13% 14% 13.0% 29,486 3,847
MO 14% 12% 13% 13% 11% 12% 14% 12.7% 35,236 4,483

State Separations vs. Rate Scatter Plot

Show code
# Round for display
scatter_data = state_avg_stats.copy()
scatter_data['Avg Employment'] = scatter_data['Avg Employment'].round(0)
scatter_data['Avg Separations'] = scatter_data['Avg Separations'].round(0)
scatter_data['Avg Separation Rate'] = scatter_data['Avg Separation Rate'].round(1)

# Create interactive scatter plot
fig = px.scatter(
    scatter_data, 
    x='Avg Separations', 
    y='Avg Separation Rate',
    text='State',
    hover_data={
        'State': True, 
        'Avg Separations': ':,.0f', 
        'Avg Separation Rate': ':.1f'
    },
    labels={
        'Avg Separations': 'Average Annual Number of Separations',
        'Avg Separation Rate': 'Average Annual Separation Rate (%)',
        'State': 'State'
    },
    title='State-Level: Average Separations vs. Separation Rate'
)

# Update layout
fig.update_traces(
    marker=dict(size=12, color=HIGH_SEPARATIONS_COLOR, opacity=0.7),
    textposition='top right',
    textfont=dict(size=9),
    hovertemplate='<b>%{text}</b><br>' +
                  'Average Separations: %{x:,.0f}<br>' +
                  'Separation Rate: %{y:.1f}%<br>' +
                  '<extra></extra>'
)

fig.update_layout(
    xaxis_title="Average Annual Number of Separations",
    yaxis_title="Average Annual Separation Rate (%)",
    hovermode='closest',
    height=600,
    annotations=[
        dict(
            text="All states shown have 1,000+ federal employees",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.02,
            y=0.98,
            xanchor="left",
            yanchor="top",
            font=dict(size=10, color="gray", style="italic")
        )
    ]
)

fig.show()