Form: 990pf Tax year: 2011 Generated: 2026-05-11 17:04:57
✅ PASSED — Strict mode: TRUE
Executive Summary
Row count
88,108
Column count
187
Vintage-aware completeness
100.00%
Raw completeness (all cols / all rows)
67.13%
Unique EINs
88,026
Duplicate EINs (amendments)
82
Missing columns vs schema
0
Extra columns vs schema
0
YoY row-count delta vs prior run
+4.46% (within ±20%)
Identity-Field Checks
Code
ein <- r$ critical_field_issues$ ein
tp <- r$ critical_field_issues$ tax_period
sub <- r$ critical_field_issues$ subsection
typ <- r$ critical_field_issues$ types
ident <- data.table (
Check = c ("EIN format (XX-XXXXXXX)" , "EIN null count" , "EIN duplicates (soft)" ,
"tax_period format (YYYYMM)" , "tax_period out-of-range" ,
"subsection_cd whitelist" , "Column types" ),
Result = c (
if (is.null (ein)) "✅ all valid" else sprintf ("❌ %d malformed" , ein$ malformed),
if (is.null (ein) && r$ summary_stats$ duplicate_eins >= 0 ) "✅ no nulls" else sprintf ("❌ %d nulls" , ein$ null_count),
sprintf ("ℹ %d (soft, not a failure)" , r$ summary_stats$ duplicate_eins),
if (is.null (tp)) "✅ all valid" else sprintf ("❌ %d malformed" , tp$ malformed_count),
if (is.null (tp)) "✅ all in range" else sprintf ("❌ %d out of range" , tp$ out_of_range_count),
if (is.null (sub)) "✅ all in IRM whitelist" else sprintf ("❌ %d unknown codes" , sub$ unknown_count),
if (is.null (typ)) "✅ all types correct" else sprintf ("❌ %s" , paste (typ$ issues, collapse = "; " ))
)
)
kable (ident)
EIN format (XX-XXXXXXX)
✅ all valid
EIN null count
✅ no nulls
EIN duplicates (soft)
ℹ 82 (soft, not a failure)
tax_period format (YYYYMM)
✅ all valid
tax_period out-of-range
✅ all in range
subsection_cd whitelist
✅ all in IRM whitelist
Column types
✅ all types correct
Subsection Code Distribution
(IRC 501(c) / 501(d) / 501(e) / 501(f) / 501(k) / 501(n) / 521 / 527 / 529 / 4947 / 1381 subsection codes per IRM 25.7.1. See data/lookups/subsection_codes.csv.)
Code
sd <- r$ summary_stats$ subsection_distribution
if (length (sd$ top_values) == 0 L) {
cat ("(no subsection data)" )
} else {
tab <- rbindlist (lapply (sd$ top_values, as.data.table))
setnames (tab, c ("code" , "count" , "pct" ))
tab[, pct : = paste0 (pct, "%" )]
tab[, count : = format (count, big.mark = "," )]
kable (tab)
}
3
82,913
94.1%
92
5,195
5.9%
Financial Summary
Code
fin <- r$ summary_stats$ financial
if (length (fin) == 0 L) {
cat ("(no financial summary)" )
} else {
tab <- data.table (
metric = c ("total_revenue" , "total_expenses" , "total_assets_eoy" ),
sum = c (fmt_money (fin$ revenue$ total), fmt_money (fin$ expenses$ total), fmt_money (fin$ assets$ total)),
median = c (fmt_money (fin$ revenue$ median), fmt_money (fin$ expenses$ median), fmt_money (fin$ assets$ median))
)
kable (tab)
}
total_revenue
—
—
total_expenses
—
—
total_assets_eoy
$631,202,753,206
$390,132
Tax Period Year Coverage
Code
d <- r$ summary_stats$ tax_period_year_distribution
if (length (d) == 0 L) {
cat ("(no coverage data)" )
} else {
tab <- data.table (tax_year = names (d), n = as.integer (unlist (d)))
setorder (tab, tax_year)
tab[, pct : = sprintf ("%.2f%%" , 100 * n / sum (n))]
tab[, n : = format (n, big.mark = "," )]
kable (tab)
}
Field Completeness by Category
Columns are grouped by Form section parsed from the crosswalk’s location field.
Code
cats <- r$ category_reports
if (length (cats) == 0 L) {
cat ("(no category breakdown)" )
} else {
tab <- rbindlist (lapply (cats, function (c) data.table (
category = c$ category_name,
n_cols = c$ column_count,
cols_present = c$ columns_present,
avg_completeness = sprintf ("%.2f%%" , c$ avg_completeness)
)))
setorder (tab, - n_cols)
kable (tab)
}
part_vi
48
48
60.02%
part_xiii
41
41
100.00%
part_xiv
41
41
97.67%
part_vii
33
33
50.68%
part_i
31
31
72.21%
part_xvi
29
29
7.57%
part_xv
21
21
4.27%
part_ii
14
14
86.32%
part_v
14
14
86.32%
other
9
9
77.36%
part_xvii
9
9
14.91%
header
4
4
100.00%
part_x
4
4
76.07%
part_ix
3
3
68.09%
part_iv
2
2
50.00%
part_xi
1
1
100.00%
part_xii
1
1
100.00%
Completeness by Vintage Cohort
When a single tax-year file blends rows from multiple extract_year × source_form cohorts, the per-cohort completeness reveals whether any vintage is dragging the overall metric down. A clean run produces 100% for every cohort.
Code
pc <- r$ completeness_by_cohort
if (length (pc) == 0 L) {
cat ("(single cohort)" )
} else {
tab <- rbindlist (lapply (pc, function (c) data.table (
extract_year = c$ extract_year,
source_form = c$ source_form,
rows = format (c$ n_rows, big.mark = "," ),
expected_cols = c$ n_expected_cols,
completeness_pct = sprintf ("%.2f%%" , c$ completeness_pct)
)))
kable (tab)
}
2012
990pf
84,343
123
100.00%
2013
990pf
2,834
182
100.00%
2014
990pf
479
182
100.00%
2015
990pf
367
183
100.00%
2016
990pf
62
183
100.00%
2020
990pf
5
184
100.00%
2021
990pf
6
184
100.00%
2022
990pf
5
184
100.00%
2023
990pf
5
184
100.00%
2024
990pf
2
184
100.00%
Data Issues
Code
issues <- character (0 )
if (length (r$ missing_columns)) issues <- c (issues, sprintf ("Missing %d columns: %s" ,
length (r$ missing_columns), paste (head (r$ missing_columns, 10 ), collapse = ", " )))
if (length (r$ extra_columns)) issues <- c (issues, sprintf ("Extra %d columns: %s" ,
length (r$ extra_columns), paste (head (r$ extra_columns, 10 ), collapse = ", " )))
if (length (r$ critical_field_issues)) {
for (k in names (r$ critical_field_issues)) {
issues <- c (issues, sprintf ("%s: %s" , k, paste (unlist (r$ critical_field_issues[[k]]), collapse = " | " )))
}
}
if (length (issues) == 0 L) {
cat ("✅ No hard-check issues detected. \n " )
} else {
for (i in issues) cat ("- " , i, " \n " , sep = "" )
}
✅ No hard-check issues detected.
Generated from /mnt/c/Users/tpoongundranar/Documents/Urban/NCCS/nccs-data-core/data/logs/quality_990pf_2011.rds. See R/quality/post_checks.R for the validator implementation and docs/05-quality-gates.qmd for the gate definitions.