Forest Service Accessions and Separations Analysis

Author

Abigail Haddad

Published

July 12, 2025

# Import required libraries
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from great_tables import GT, style, loc

# Configuration
PARQUET_DIR = "../../separations_accessions/parquet/"
FOREST_SERVICE_CODE = "AG11"

# Define color scales for heatmaps
ACCESSIONS_COLOR = "#1f77b4"  # Blue for accessions
SEPARATIONS_COLOR = "#ff7f0e"  # Orange for separations
NET_POSITIVE_COLOR = "#2ca02c"  # Green for positive net
NET_NEGATIVE_COLOR = "#d62728"  # Red for negative net

Data Loading

Show code
# Load accessions data
accessions_files = [
    "fedscope_accessions_FY2005-2009.parquet",
    "fedscope_accessions_FY2010-2014.parquet", 
    "fedscope_accessions_FY2015-2019.parquet",
    "fedscope_accessions_FY2020-2024.parquet",
    "fedscope_accessions_March_2025.parquet"
]

# Load separations data
separations_files = [
    "fedscope_separations_FY2005-2009.parquet",
    "fedscope_separations_FY2010-2014.parquet",
    "fedscope_separations_FY2015-2019.parquet", 
    "fedscope_separations_FY2020-2024.parquet",
    "fedscope_separations_March_2025.parquet"
]

# Load all accessions data
accessions_data = []
for file in accessions_files:
    try:
        df = pd.read_parquet(PARQUET_DIR + file)
        # Handle both uppercase and lowercase column names
        agysub_col = 'AGYSUB' if 'AGYSUB' in df.columns else 'agysub'
        # Filter for Forest Service
        df_forest = df[df[agysub_col] == FOREST_SERVICE_CODE].copy()
        if not df_forest.empty:
            accessions_data.append(df_forest)
            print(f"Loaded {len(df_forest)} Forest Service accessions records from {file}")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# Load all separations data  
separations_data = []
for file in separations_files:
    try:
        df = pd.read_parquet(PARQUET_DIR + file)
        # Filter for Forest Service
        df_forest = df[df['agysub'] == FOREST_SERVICE_CODE].copy()
        if not df_forest.empty:
            separations_data.append(df_forest)
            print(f"Loaded {len(df_forest)} Forest Service separations records from {file}")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# Combine all data
all_accessions = pd.concat(accessions_data, ignore_index=True) if accessions_data else pd.DataFrame()
all_separations = pd.concat(separations_data, ignore_index=True) if separations_data else pd.DataFrame()

print(f"\nTotal Forest Service accessions records: {len(all_accessions):,}")
print(f"Total Forest Service separations records: {len(all_separations):,}")
Loaded 74461 Forest Service accessions records from fedscope_accessions_FY2005-2009.parquet
Loaded 69871 Forest Service accessions records from fedscope_accessions_FY2010-2014.parquet
Loaded 67422 Forest Service accessions records from fedscope_accessions_FY2015-2019.parquet
Loaded 49603 Forest Service accessions records from fedscope_accessions_FY2020-2024.parquet
Loaded 8031 Forest Service accessions records from fedscope_accessions_March_2025.parquet
Loaded 77754 Forest Service separations records from fedscope_separations_FY2005-2009.parquet
Loaded 71494 Forest Service separations records from fedscope_separations_FY2010-2014.parquet
Loaded 68659 Forest Service separations records from fedscope_separations_FY2015-2019.parquet
Loaded 45472 Forest Service separations records from fedscope_separations_FY2020-2024.parquet
Loaded 8249 Forest Service separations records from fedscope_separations_March_2025.parquet

Total Forest Service accessions records: 269,388
Total Forest Service separations records: 271,628

Monthly Aggregation

Show code
def aggregate_by_month(df, data_type):
    """Aggregate data by month (EFDATE field)."""
    if df.empty:
        return pd.DataFrame()
    
    # Convert EFDATE to string and extract year/month
    df['efdate_str'] = df['efdate'].astype(str)
    df['year'] = df['efdate_str'].str[:4].astype(int)
    df['month'] = df['efdate_str'].str[4:6].astype(int)
    
    # Group by year and month
    monthly = df.groupby(['year', 'month']).size().reset_index(name='count')
    monthly['data_type'] = data_type
    
    return monthly

# Aggregate accessions and separations by month
monthly_accessions = aggregate_by_month(all_accessions, 'accessions')
monthly_separations = aggregate_by_month(all_separations, 'separations')

# Merge to calculate net change
if not monthly_accessions.empty and not monthly_separations.empty:
    monthly_combined = pd.merge(
        monthly_accessions[['year', 'month', 'count']],
        monthly_separations[['year', 'month', 'count']],
        on=['year', 'month'],
        how='outer',
        suffixes=('_acc', '_sep')
    ).fillna(0)
    
    monthly_combined['accessions'] = monthly_combined['count_acc'].astype(int)
    monthly_combined['separations'] = monthly_combined['count_sep'].astype(int)
    monthly_combined['net_change'] = monthly_combined['accessions'] - monthly_combined['separations']
    monthly_combined = monthly_combined.drop(['count_acc', 'count_sep'], axis=1)
else:
    monthly_combined = pd.DataFrame()

print(f"Monthly data points: {len(monthly_combined)}")
Monthly data points: 240

Accessions Heatmap

Show code
if not monthly_combined.empty:
    # Create pivot table for accessions
    accessions_pivot = monthly_combined.pivot(index='month', columns='year', values='accessions')
    
    # Get most recent 10 years for main display
    all_years = sorted(accessions_pivot.columns)
    recent_years = all_years[-10:]  # Last 10 years
    recent_accessions = accessions_pivot[recent_years]
    
    # Create month labels
    month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    recent_accessions.index = [month_labels[i-1] for i in recent_accessions.index]
    
    # Create GT heatmap for accessions (recent years)
    accessions_df = recent_accessions.reset_index()
    accessions_df.columns = ['Month'] + [str(col) for col in recent_accessions.columns]
    
    gt_accessions = (
        GT(accessions_df)
        .tab_header(title=f"Forest Service Monthly Accessions ({recent_years[0]}-{recent_years[-1]})")
        .fmt_number(columns=list(accessions_df.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(accessions_df.columns[1:]),
            palette=["white", ACCESSIONS_COLOR],
            domain=[0, accessions_pivot.max().max()],
            na_color="lightgray"
        )
    )
    
    gt_accessions.show()
Forest Service Monthly Accessions (2016-2025)
Month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan 350 784 238 83 272 351 285 631 123
Feb 300 193 247 233 293 309 441 436 111
Mar 738 422 556 944 1,377 911 1,062 1,125 502
Apr 2,163 4,480 4,088 3,720 2,644 3,035 2,984 2,884 1,949
May 7,735 5,245 5,373 4,987 5,556 4,566 3,605 3,225 2,910
Jun 1,336 1,354 1,252 1,406 1,450 1,144 1,408 2,046 1,744
Jul 488 371 398 341 402 288 899 788 221
Aug 271 235 317 331 503 260 403 415 151
Sep 198 99 269 251 234 229 304 383 82
Oct 300 151 156 234 215 213 393 102
Nov 139 107 156 178 143 154 267 82
Dec 186 97 88 153 143 116 246 54

Separations Heatmap

Show code
if not monthly_combined.empty:
    # Create pivot table for separations
    separations_pivot = monthly_combined.pivot(index='month', columns='year', values='separations')
    
    # Get most recent 10 years for main display
    recent_separations = separations_pivot[recent_years]
    
    # Create month labels
    recent_separations.index = [month_labels[i-1] for i in recent_separations.index]
    
    # Create GT heatmap for separations (recent years)
    separations_df = recent_separations.reset_index()
    separations_df.columns = ['Month'] + [str(col) for col in recent_separations.columns]
    
    gt_separations = (
        GT(separations_df)
        .tab_header(title=f"Forest Service Monthly Separations ({recent_years[0]}-{recent_years[-1]})")
        .fmt_number(columns=list(separations_df.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(separations_df.columns[1:]),
            palette=["white", SEPARATIONS_COLOR],
            domain=[0, separations_pivot.max().max()],
            na_color="lightgray"
        )
    )
    
    gt_separations.show()
Forest Service Monthly Separations (2016-2025)
Month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan 517 533 449 382 454 504 374 341 378
Feb 271 258 264 367 326 245 351 337 279
Mar 305 314 470 424 299 350 378 401 434
Apr 455 386 394 354 306 412 414 362 439
May 369 366 372 369 290 376 399 288 372
Jun 366 343 364 332 310 357 340 375 352
Jul 385 391 345 374 440 602 509 381 369
Aug 1,622 1,550 1,512 1,539 1,359 1,362 1,203 1,255 1,043
Sep 2,153 2,648 2,446 2,278 1,623 1,703 1,558 1,248 1,086
Oct 4,403 3,836 3,554 3,169 2,647 2,867 2,147 1,744
Nov 2,267 2,449 2,394 2,341 2,449 2,171 1,746 1,219
Dec 902 772 785 1,005 1,016 1,079 1,003 534

Net Change Heatmap

Show code
if not monthly_combined.empty:
    # Create pivot table for net change
    net_pivot = monthly_combined.pivot(index='month', columns='year', values='net_change')
    
    # Get most recent 10 years for main display
    recent_net = net_pivot[recent_years]
    
    # Create month labels
    recent_net.index = [month_labels[i-1] for i in recent_net.index]
    
    # Get max absolute value for symmetric scale (from all data)
    max_abs_val = max(abs(net_pivot.min().min()), abs(net_pivot.max().max()))
    
    # Create GT heatmap for net change with diverging colors (recent years)
    net_df = recent_net.reset_index()
    net_df.columns = ['Month'] + [str(col) for col in recent_net.columns]
    
    gt_net = (
        GT(net_df)
        .tab_header(title=f"Forest Service Monthly Net Change ({recent_years[0]}-{recent_years[-1]})")
        .fmt_number(columns=list(net_df.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(net_df.columns[1:]),
            palette=[NET_NEGATIVE_COLOR, "white", NET_POSITIVE_COLOR],
            domain=[-max_abs_val, max_abs_val],
            na_color="lightgray"
        )
    )
    
    gt_net.show()
Forest Service Monthly Net Change (2016-2025)
Month 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan −167 251 −211 −299 −182 −153 −89 290 −255
Feb 29 −65 −17 −134 −33 64 90 99 −168
Mar 433 108 86 520 1,078 561 684 724 68
Apr 1,708 4,094 3,694 3,366 2,338 2,623 2,570 2,522 1,510
May 7,366 4,879 5,001 4,618 5,266 4,190 3,206 2,937 2,538
Jun 970 1,011 888 1,074 1,140 787 1,068 1,671 1,392
Jul 103 −20 53 −33 −38 −314 390 407 −148
Aug −1,351 −1,315 −1,195 −1,208 −856 −1,102 −800 −840 −892
Sep −1,955 −2,549 −2,177 −2,027 −1,389 −1,474 −1,254 −865 −1,004
Oct −4,103 −3,685 −3,398 −2,935 −2,432 −2,654 −1,754 −1,642
Nov −2,128 −2,342 −2,238 −2,163 −2,306 −2,017 −1,479 −1,137
Dec −716 −675 −697 −852 −873 −963 −757 −480

Complete Historical View (All Years)

Show code
if not monthly_combined.empty:
    # Full historical pivot tables
    accessions_pivot_full = monthly_combined.pivot(index='month', columns='year', values='accessions')
    separations_pivot_full = monthly_combined.pivot(index='month', columns='year', values='separations')
    net_pivot_full = monthly_combined.pivot(index='month', columns='year', values='net_change')
    
    # Apply month labels to all
    for pivot in [accessions_pivot_full, separations_pivot_full, net_pivot_full]:
        pivot.index = [month_labels[i-1] for i in pivot.index]
    
    # Create tiny accessions table
    acc_df_full = accessions_pivot_full.reset_index()
    acc_df_full.columns = ['Month'] + [str(col) for col in accessions_pivot_full.columns]
    
    gt_acc_full = (
        GT(acc_df_full)
        .tab_header(title="Forest Service Monthly Accessions (All Years)")
        .fmt_number(columns=list(acc_df_full.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(acc_df_full.columns[1:]),
            palette=["white", ACCESSIONS_COLOR],
            domain=[0, accessions_pivot_full.max().max()],
            na_color="lightgray"
        )
        .tab_options(
            table_font_size="6px",
            data_row_padding="1px",
            column_labels_font_size="5px"
        )
    )
    gt_acc_full.show()
    
    # Create tiny separations table
    sep_df_full = separations_pivot_full.reset_index()
    sep_df_full.columns = ['Month'] + [str(col) for col in separations_pivot_full.columns]
    
    gt_sep_full = (
        GT(sep_df_full)
        .tab_header(title="Forest Service Monthly Separations (All Years)")
        .fmt_number(columns=list(sep_df_full.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(sep_df_full.columns[1:]),
            palette=["white", SEPARATIONS_COLOR],
            domain=[0, separations_pivot_full.max().max()],
            na_color="lightgray"
        )
        .tab_options(
            table_font_size="6px",
            data_row_padding="1px",
            column_labels_font_size="5px"
        )
    )
    gt_sep_full.show()
    
    # Create tiny net change table
    net_df_full = net_pivot_full.reset_index()
    net_df_full.columns = ['Month'] + [str(col) for col in net_pivot_full.columns]
    
    max_abs_val_full = max(abs(net_pivot_full.min().min()), abs(net_pivot_full.max().max()))
    
    gt_net_full = (
        GT(net_df_full)
        .tab_header(title="Forest Service Monthly Net Change (All Years)")
        .fmt_number(columns=list(net_df_full.columns[1:]), decimals=0, use_seps=True)
        .data_color(
            columns=list(net_df_full.columns[1:]),
            palette=[NET_NEGATIVE_COLOR, "white", NET_POSITIVE_COLOR],
            domain=[-max_abs_val_full, max_abs_val_full],
            na_color="lightgray"
        )
        .tab_options(
            table_font_size="6px",
            data_row_padding="1px",
            column_labels_font_size="5px"
        )
    )
    gt_net_full.show()
Forest Service Monthly Accessions (All Years)
Month 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan 411 392 266 217 323 909 525 246 86 220 393 350 784 238 83 272 351 285 631 123
Feb 348 482 284 129 276 594 452 206 142 188 257 300 193 247 233 293 309 441 436 111
Mar 589 616 429 914 1,186 1,002 712 388 439 610 805 738 422 556 944 1,377 911 1,062 1,125 502
Apr 1,545 3,046 3,858 2,563 2,748 2,672 1,863 1,859 1,720 1,757 2,008 2,163 4,480 4,088 3,720 2,644 3,035 2,984 2,884 1,949
May 7,855 6,066 4,431 6,349 7,043 7,014 6,070 6,104 5,400 6,209 7,557 7,735 5,245 5,373 4,987 5,556 4,566 3,605 3,225 2,910
Jun 2,772 2,679 2,014 2,988 3,185 3,545 3,140 2,954 2,926 3,003 1,292 1,336 1,354 1,252 1,406 1,450 1,144 1,408 2,046 1,744
Jul 482 497 368 551 635 568 645 565 348 286 341 488 371 398 341 402 288 899 788 221
Aug 353 297 223 440 489 506 186 188 174 206 265 271 235 317 331 503 260 403 415 151
Sep 215 290 273 213 252 215 115 144 139 145 216 198 99 269 251 234 229 304 383 82
Oct 330 317 262 230 186 226 260 103 263 119 165 196 300 151 156 234 215 213 393 102
Nov 204 160 168 163 189 233 241 119 119 83 231 248 139 107 156 178 143 154 267 82
Dec 170 140 129 79 152 224 178 70 84 164 125 147 186 97 88 153 143 116 246 54
Forest Service Monthly Separations (All Years)
Month 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan 635 638 656 679 746 750 495 232 552 613 564 517 533 449 382 454 504 374 341 378
Feb 352 298 273 317 263 293 255 205 248 230 221 271 258 264 367 326 245 351 337 279
Mar 459 414 447 403 271 297 261 265 257 255 309 305 314 470 424 299 350 378 401 434
Apr 604 463 456 339 272 316 309 283 256 316 380 455 386 394 354 306 412 414 362 439
May 587 513 507 412 306 304 337 348 365 459 550 369 366 372 369 290 376 399 288 372
Jun 574 531 535 380 277 360 324 400 371 305 317 366 343 364 332 310 357 340 375 352
Jul 635 509 461 435 492 596 558 371 354 337 399 385 391 345 374 440 602 509 381 369
Aug 3,186 3,276 2,873 3,657 3,523 3,395 2,539 2,472 1,566 1,645 1,509 1,622 1,550 1,512 1,539 1,359 1,362 1,203 1,255 1,043
Sep 3,127 3,071 3,070 2,654 2,641 3,074 2,826 2,181 2,224 1,939 1,809 2,153 2,648 2,446 2,278 1,623 1,703 1,558 1,248 1,086
Oct 4,436 3,787 2,719 2,507 2,578 3,672 3,929 3,702 3,161 3,192 3,505 4,291 4,403 3,836 3,554 3,169 2,647 2,867 2,147 1,744
Nov 1,801 2,064 2,006 2,057 2,308 2,575 2,480 2,061 2,796 2,666 3,185 2,527 2,267 2,449 2,394 2,341 2,449 2,171 1,746 1,219
Dec 1,143 798 741 797 795 944 1,102 1,122 1,130 624 709 755 902 772 785 1,005 1,016 1,079 1,003 534
Forest Service Monthly Net Change (All Years)
Month 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
Jan −224 −246 −390 −462 −423 159 30 14 −466 −393 −171 −167 251 −211 −299 −182 −153 −89 290 −255
Feb −4 184 11 −188 13 301 197 1 −106 −42 36 29 −65 −17 −134 −33 64 90 99 −168
Mar 130 202 −18 511 915 705 451 123 182 355 496 433 108 86 520 1,078 561 684 724 68
Apr 941 2,583 3,402 2,224 2,476 2,356 1,554 1,576 1,464 1,441 1,628 1,708 4,094 3,694 3,366 2,338 2,623 2,570 2,522 1,510
May 7,268 5,553 3,924 5,937 6,737 6,710 5,733 5,756 5,035 5,750 7,007 7,366 4,879 5,001 4,618 5,266 4,190 3,206 2,937 2,538
Jun 2,198 2,148 1,479 2,608 2,908 3,185 2,816 2,554 2,555 2,698 975 970 1,011 888 1,074 1,140 787 1,068 1,671 1,392
Jul −153 −12 −93 116 143 −28 87 194 −6 −51 −58 103 −20 53 −33 −38 −314 390 407 −148
Aug −2,833 −2,979 −2,650 −3,217 −3,034 −2,889 −2,353 −2,284 −1,392 −1,439 −1,244 −1,351 −1,315 −1,195 −1,208 −856 −1,102 −800 −840 −892
Sep −2,912 −2,781 −2,797 −2,441 −2,389 −2,859 −2,711 −2,037 −2,085 −1,794 −1,593 −1,955 −2,549 −2,177 −2,027 −1,389 −1,474 −1,254 −865 −1,004
Oct −4,106 −3,470 −2,457 −2,277 −2,392 −3,446 −3,669 −3,599 −2,898 −3,073 −3,340 −4,095 −4,103 −3,685 −3,398 −2,935 −2,432 −2,654 −1,754 −1,642
Nov −1,597 −1,904 −1,838 −1,894 −2,119 −2,342 −2,239 −1,942 −2,677 −2,583 −2,954 −2,279 −2,128 −2,342 −2,238 −2,163 −2,306 −2,017 −1,479 −1,137
Dec −973 −658 −612 −718 −643 −720 −924 −1,052 −1,046 −460 −584 −608 −716 −675 −697 −852 −873 −963 −757 −480

Key Insights

Show code
if not monthly_combined.empty:
    # Find months with highest accessions/separations
    max_acc_idx = monthly_combined['accessions'].idxmax()
    max_sep_idx = monthly_combined['separations'].idxmax()
    max_net_pos_idx = monthly_combined[monthly_combined['net_change'] > 0]['net_change'].idxmax() if (monthly_combined['net_change'] > 0).any() else None
    max_net_neg_idx = monthly_combined[monthly_combined['net_change'] < 0]['net_change'].idxmin() if (monthly_combined['net_change'] < 0).any() else None
    
    insights = []
    
    if not pd.isna(max_acc_idx):
        max_acc = monthly_combined.loc[max_acc_idx]
        insights.append({
            'Metric': 'Highest Monthly Accessions',
            'Month': f"{month_labels[int(max_acc['month'])-1]} {int(max_acc['year'])}",
            'Value': f"{int(max_acc['accessions']):,}"
        })
    
    if not pd.isna(max_sep_idx):
        max_sep = monthly_combined.loc[max_sep_idx]
        insights.append({
            'Metric': 'Highest Monthly Separations',
            'Month': f"{month_labels[int(max_sep['month'])-1]} {int(max_sep['year'])}",
            'Value': f"{int(max_sep['separations']):,}"
        })
    
    if max_net_pos_idx is not None and not pd.isna(max_net_pos_idx):
        max_net_pos = monthly_combined.loc[max_net_pos_idx]
        insights.append({
            'Metric': 'Largest Net Gain',
            'Month': f"{month_labels[int(max_net_pos['month'])-1]} {int(max_net_pos['year'])}",
            'Value': f"+{int(max_net_pos['net_change']):,}"
        })
    
    if max_net_neg_idx is not None and not pd.isna(max_net_neg_idx):
        max_net_neg = monthly_combined.loc[max_net_neg_idx]
        insights.append({
            'Metric': 'Largest Net Loss',
            'Month': f"{month_labels[int(max_net_neg['month'])-1]} {int(max_net_neg['year'])}",
            'Value': f"{int(max_net_neg['net_change']):,}"
        })
    
    if insights:
        insights_df = pd.DataFrame(insights)
        
        gt_insights = (
            GT(insights_df)
            .tab_header(title="Key Insights")
        )
        
        gt_insights.show()
Key Insights
Metric Month Value
Highest Monthly Accessions May 2005 7,855
Highest Monthly Separations Oct 2004 4,436
Largest Net Gain May 2016 +7,366
Largest Net Loss Oct 2004 -4,106