Form: 990combined Tax year: 2007 Generated: 2026-05-15 11:02:50
❌ FAILED — Strict mode: FALSE
Executive Summary
Row count
175
Column count
61
Vintage-aware completeness
99.86%
Raw completeness (all cols / all rows)
94.60%
Unique EINs
174
Duplicate EINs (amendments)
1
Missing columns vs schema
89
Extra columns vs schema
22
YoY row-count delta vs prior run
no_baseline
Identity-Field Checks
Code
ein <- r$ critical_field_issues$ ein
tp <- r$ critical_field_issues$ tax_period
sub <- r$ critical_field_issues$ subsection
typ <- r$ critical_field_issues$ types
ident <- data.table (
Check = c ("EIN format (XX-XXXXXXX)" , "EIN null count" , "EIN duplicates (soft)" ,
"tax_period format (YYYYMM)" , "tax_period out-of-range" ,
"subsection_cd whitelist" , "Column types" ),
Result = c (
if (is.null (ein)) "✅ all valid" else sprintf ("❌ %d malformed" , ein$ malformed),
if (is.null (ein) && r$ summary_stats$ duplicate_eins >= 0 ) "✅ no nulls" else sprintf ("❌ %d nulls" , ein$ null_count),
sprintf ("ℹ %d (soft, not a failure)" , r$ summary_stats$ duplicate_eins),
if (is.null (tp)) "✅ all valid" else sprintf ("❌ %d malformed" , tp$ malformed_count),
if (is.null (tp)) "✅ all in range" else sprintf ("❌ %d out of range" , tp$ out_of_range_count),
if (is.null (sub)) "✅ all in IRM whitelist" else sprintf ("❌ %d unknown codes" , sub$ unknown_count),
if (is.null (typ)) "✅ all types correct" else sprintf ("❌ %s" , paste (typ$ issues, collapse = "; " ))
)
)
kable (ident)
EIN format (XX-XXXXXXX)
✅ all valid
EIN null count
✅ no nulls
EIN duplicates (soft)
ℹ 1 (soft, not a failure)
tax_period format (YYYYMM)
✅ all valid
tax_period out-of-range
✅ all in range
subsection_cd whitelist
✅ all in IRM whitelist
Column types
✅ all types correct
Subsection Code Distribution
(IRC 501(c) / 501(d) / 501(e) / 501(f) / 501(k) / 501(n) / 521 / 527 / 529 / 4947 / 1381 subsection codes per IRM 25.7.1. See data/lookups/subsection_codes.csv.)
Code
sd <- r$ summary_stats$ subsection_distribution
if (length (sd$ top_values) == 0 L) {
cat ("(no subsection data)" )
} else {
tab <- rbindlist (lapply (sd$ top_values, as.data.table))
setnames (tab, c ("code" , "count" , "pct" ))
tab[, pct : = paste0 (pct, "%" )]
tab[, count : = format (count, big.mark = "," )]
kable (tab)
}
3
108
61.71%
4
13
7.43%
7
13
7.43%
6
10
5.71%
5
9
5.14%
10
5
2.86%
13
4
2.29%
19
4
2.29%
8
4
2.29%
12
3
1.71%
Financial Summary
Code
fin <- r$ summary_stats$ financial
if (length (fin) == 0 L) {
cat ("(no financial summary)" )
} else {
tab <- data.table (
metric = c ("total_revenue" , "total_expenses" , "total_assets_eoy" ),
sum = c (fmt_money (fin$ revenue$ total), fmt_money (fin$ expenses$ total), fmt_money (fin$ assets$ total)),
median = c (fmt_money (fin$ revenue$ median), fmt_money (fin$ expenses$ median), fmt_money (fin$ assets$ median))
)
kable (tab)
}
total_revenue
$6,140,186
$6,242
total_expenses
—
—
total_assets_eoy
$6,118,417
$5,100
Tax Period Year Coverage
Code
d <- r$ summary_stats$ tax_period_year_distribution
if (length (d) == 0 L) {
cat ("(no coverage data)" )
} else {
tab <- data.table (tax_year = names (d), n = as.integer (unlist (d)))
setorder (tab, tax_year)
tab[, pct : = sprintf ("%.2f%%" , 100 * n / sum (n))]
tab[, n : = format (n, big.mark = "," )]
kable (tab)
}
Field Completeness by Category
Columns are grouped by Form section parsed from the crosswalk’s location field.
Code
cats <- r$ category_reports
if (length (cats) == 0 L) {
cat ("(no category breakdown)" )
} else {
tab <- rbindlist (lapply (cats, function (c) data.table (
category = c$ category_name,
n_cols = c$ column_count,
cols_present = c$ columns_present,
avg_completeness = sprintf ("%.2f%%" , c$ avg_completeness)
)))
setorder (tab, - n_cols)
kable (tab)
}
other
17
17
97.88%
sched_a
15
15
97.90%
part_i
8
8
100.00%
header
3
3
100.00%
part_iv
3
3
100.00%
part_ii
2
2
100.00%
part_vi
2
2
100.00%
Completeness by Vintage Cohort
When a single tax-year file blends rows from multiple extract_year × source_form cohorts, the per-cohort completeness reveals whether any vintage is dragging the overall metric down. A clean run produces 100% for every cohort.
Code
pc <- r$ completeness_by_cohort
if (length (pc) == 0 L) {
cat ("(single cohort)" )
} else {
tab <- rbindlist (lapply (pc, function (c) data.table (
extract_year = c$ extract_year,
source_form = c$ source_form,
rows = format (c$ n_rows, big.mark = "," ),
expected_cols = c$ n_expected_cols,
completeness_pct = sprintf ("%.2f%%" , c$ completeness_pct)
)))
kable (tab)
}
2012
990
4
35
100.00%
2013
990
2
58
100.00%
2014
990
3
58
100.00%
2015
990
3
59
100.00%
2016
990
3
59
100.00%
2017
990
1
59
100.00%
2018
990
1
59
100.00%
2019
990
1
59
98.31%
2021
990
2
59
100.00%
2012
990ez
17
35
100.00%
2013
990ez
13
58
100.00%
2014
990ez
40
58
100.00%
2015
990ez
29
59
100.00%
2016
990ez
21
59
100.00%
2017
990ez
10
59
100.00%
2018
990ez
7
59
100.00%
2019
990ez
7
59
98.79%
2020
990ez
1
59
98.31%
2021
990ez
2
59
99.15%
2022
990ez
4
59
98.73%
2023
990ez
4
59
98.73%
Data Issues
Code
issues <- character (0 )
if (length (r$ missing_columns)) issues <- c (issues, sprintf ("Missing %d columns: %s" ,
length (r$ missing_columns), paste (head (r$ missing_columns, 10 ), collapse = ", " )))
if (length (r$ extra_columns)) issues <- c (issues, sprintf ("Extra %d columns: %s" ,
length (r$ extra_columns), paste (head (r$ extra_columns, 10 ), collapse = ", " )))
if (length (r$ critical_field_issues)) {
for (k in names (r$ critical_field_issues)) {
issues <- c (issues, sprintf ("%s: %s" , k, paste (unlist (r$ critical_field_issues[[k]]), collapse = " | " )))
}
}
if (length (issues) == 0 L) {
cat ("✅ No hard-check issues detected. \n " )
} else {
for (i in issues) cat ("- " , i, " \n " , sep = "" )
}
- Missing 89 columns: lobbying_activities_cd, membership_fees_received_sec170, total_direct_lobbying_expense_sec501h, grassroots_lobbying_amount, grassroots_lobbying_ceiling_amount, taxable_lobbying_amount_all_electing_orgs, nontaxable_lobbying_amount, other_lobbying_amount, taxable_grassroots_lobbying_amount_all_electing_orgs, accounting_period
- Extra 22 columns: efile_indicator, excess_benefit_transaction_cd, excess_over_1pct_sec509, excess_over_2pct_sec170, filed_form_990t_cd, gross_income_interest_sec509, gross_receipts_related_activities_sec509, net_ubi_excl_10b_sec509, net_ubi_sec170, other_income_sec509
Generated from /mnt/c/Users/tpoongundranar/Documents/Urban/NCCS/nccs-data-core/data/logs/merged/quality_990combined_2007.rds. See R/quality/post_checks.R for the validator implementation and docs/05-quality-gates.qmd for the gate definitions.