Form: 990pf Tax year: 2001 Generated: 2026-05-15 11:02:45
❌ FAILED — Strict mode: FALSE
Executive Summary
Row count
9
Column count
189
Vintage-aware completeness
100.00%
Raw completeness (all cols / all rows)
87.07%
Unique EINs
9
Duplicate EINs (amendments)
0
Missing columns vs schema
8
Extra columns vs schema
76
YoY row-count delta vs prior run
no_baseline
Identity-Field Checks
Code
ein <- r$ critical_field_issues$ ein
tp <- r$ critical_field_issues$ tax_period
sub <- r$ critical_field_issues$ subsection
typ <- r$ critical_field_issues$ types
ident <- data.table (
Check = c ("EIN format (XX-XXXXXXX)" , "EIN null count" , "EIN duplicates (soft)" ,
"tax_period format (YYYYMM)" , "tax_period out-of-range" ,
"subsection_cd whitelist" , "Column types" ),
Result = c (
if (is.null (ein)) "✅ all valid" else sprintf ("❌ %d malformed" , ein$ malformed),
if (is.null (ein) && r$ summary_stats$ duplicate_eins >= 0 ) "✅ no nulls" else sprintf ("❌ %d nulls" , ein$ null_count),
sprintf ("ℹ %d (soft, not a failure)" , r$ summary_stats$ duplicate_eins),
if (is.null (tp)) "✅ all valid" else sprintf ("❌ %d malformed" , tp$ malformed_count),
if (is.null (tp)) "✅ all in range" else sprintf ("❌ %d out of range" , tp$ out_of_range_count),
if (is.null (sub)) "✅ all in IRM whitelist" else sprintf ("❌ %d unknown codes" , sub$ unknown_count),
if (is.null (typ)) "✅ all types correct" else sprintf ("❌ %s" , paste (typ$ issues, collapse = "; " ))
)
)
kable (ident)
EIN format (XX-XXXXXXX)
✅ all valid
EIN null count
✅ no nulls
EIN duplicates (soft)
ℹ 0 (soft, not a failure)
tax_period format (YYYYMM)
✅ all valid
tax_period out-of-range
✅ all in range
subsection_cd whitelist
✅ all in IRM whitelist
Column types
✅ all types correct
Subsection Code Distribution
(IRC 501(c) / 501(d) / 501(e) / 501(f) / 501(k) / 501(n) / 521 / 527 / 529 / 4947 / 1381 subsection codes per IRM 25.7.1. See data/lookups/subsection_codes.csv.)
Code
sd <- r$ summary_stats$ subsection_distribution
if (length (sd$ top_values) == 0 L) {
cat ("(no subsection data)" )
} else {
tab <- rbindlist (lapply (sd$ top_values, as.data.table))
setnames (tab, c ("code" , "count" , "pct" ))
tab[, pct : = paste0 (pct, "%" )]
tab[, count : = format (count, big.mark = "," )]
kable (tab)
}
Financial Summary
Code
fin <- r$ summary_stats$ financial
if (length (fin) == 0 L) {
cat ("(no financial summary)" )
} else {
tab <- data.table (
metric = c ("total_revenue" , "total_expenses" , "total_assets_eoy" ),
sum = c (fmt_money (fin$ revenue$ total), fmt_money (fin$ expenses$ total), fmt_money (fin$ assets$ total)),
median = c (fmt_money (fin$ revenue$ median), fmt_money (fin$ expenses$ median), fmt_money (fin$ assets$ median))
)
kable (tab)
}
total_revenue
—
—
total_expenses
—
—
total_assets_eoy
$32,052,573
$525,323
Tax Period Year Coverage
Code
d <- r$ summary_stats$ tax_period_year_distribution
if (length (d) == 0 L) {
cat ("(no coverage data)" )
} else {
tab <- data.table (tax_year = names (d), n = as.integer (unlist (d)))
setorder (tab, tax_year)
tab[, pct : = sprintf ("%.2f%%" , 100 * n / sum (n))]
tab[, n : = format (n, big.mark = "," )]
kable (tab)
}
Field Completeness by Category
Columns are grouped by Form section parsed from the crosswalk’s location field.
Code
cats <- r$ category_reports
if (length (cats) == 0 L) {
cat ("(no category breakdown)" )
} else {
tab <- rbindlist (lapply (cats, function (c) data.table (
category = c$ category_name,
n_cols = c$ column_count,
cols_present = c$ columns_present,
avg_completeness = sprintf ("%.2f%%" , c$ avg_completeness)
)))
setorder (tab, - n_cols)
kable (tab)
}
part_xiv
39
39
100.00%
other
32
32
96.88%
part_i
21
21
96.83%
part_vi
12
12
91.67%
header
3
3
100.00%
Completeness by Vintage Cohort
When a single tax-year file blends rows from multiple extract_year × source_form cohorts, the per-cohort completeness reveals whether any vintage is dragging the overall metric down. A clean run produces 100% for every cohort.
Code
pc <- r$ completeness_by_cohort
if (length (pc) == 0 L) {
cat ("(single cohort)" )
} else {
tab <- rbindlist (lapply (pc, function (c) data.table (
extract_year = c$ extract_year,
source_form = c$ source_form,
rows = format (c$ n_rows, big.mark = "," ),
expected_cols = c$ n_expected_cols,
completeness_pct = sprintf ("%.2f%%" , c$ completeness_pct)
)))
kable (tab)
}
2012
990pf
3
123
100.00%
2014
990pf
4
182
100.00%
2015
990pf
1
183
100.00%
2016
990pf
1
183
100.00%
Data Issues
Code
issues <- character (0 )
if (length (r$ missing_columns)) issues <- c (issues, sprintf ("Missing %d columns: %s" ,
length (r$ missing_columns), paste (head (r$ missing_columns, 10 ), collapse = ", " )))
if (length (r$ extra_columns)) issues <- c (issues, sprintf ("Extra %d columns: %s" ,
length (r$ extra_columns), paste (head (r$ extra_columns, 10 ), collapse = ", " )))
if (length (r$ critical_field_issues)) {
for (k in names (r$ critical_field_issues)) {
issues <- c (issues, sprintf ("%s: %s" , k, paste (unlist (r$ critical_field_issues[[k]]), collapse = " | " )))
}
}
if (length (issues) == 0 L) {
cat ("✅ No hard-check issues detected. \n " )
} else {
for (i in issues) cat ("- " , i, " \n " , sep = "" )
}
- Missing 8 columns: accounting_period, activity_code_1, activity_code_2, activity_code_3, general_public_support_4yr_total, total_investment_income, total_adjusted_net_income, undistributed_income_prior_years_cd
- Extra 76 columns: eo_status, operating_foundation_cd, asset_size_code, total_revenue_size_code, interest_revenue, dividends, dqp_paid_compensation_cd, nce_org_transfer_other_assets_cd, undistributed_income_cd, excess_business_holdings_cd
Generated from /mnt/c/Users/tpoongundranar/Documents/Urban/NCCS/nccs-data-core/data/logs/merged/quality_990pf_2001.rds. See R/quality/post_checks.R for the validator implementation and docs/05-quality-gates.qmd for the gate definitions.