1 Labeling Variables
1.1 Packages
library( tidyverse ) # data wrangling
library( data.table ) # data wrangling
library( pander ) # pretty tables
library( knitr ) # pretty tables
library( DiagrammeR ) # drawing diagrams
library( ggthemes ) # graphics
library( gridExtra ) # graphics
# CUSTOM FUNCTIONS: fable()
source( "R/functions.R" )
1.2 Load Data
# REMOTE OPTION
<- "https://nccsdata.s3.us-east-1.amazonaws.com/harmonized/bmf/unified/BMF_UNIFIED_V1.1.csv"
URL <- fread( URL ) d
<- fread( "data/BMF_UNIFIED_V1.1.csv" )
d
<- str_pad( d$BMF_SUBSECTION_CODE, width=2, pad="0" )
n_501c $ORG_TYPE_501C <- paste0( "501C_", n_501c )
d$ORG_TYPE_501C[ d$ORG_TYPE_501C == "501C_NA" ] <- NA
d
$ORG_TYPE_PF <- d$BMF_PF_FILING_REQ_CODE %in% c(1,2,3) %>% as.numeric()
d$ORG_TYPE_PF[ is.na(d$BMF_PF_FILING_REQ_CODE) ] <- NA
d
<- d %>% rename( ORG_NAME = ORG_NAME_CURRENT )
d <- d %>% rename( BMF_CORP_FORM = BMF_ORGANIZATION_CODE )
d <- d %>% rename( BMF_ASSET_LEVEL = BMF_ASSET_CODE )
d <- d %>% rename( BMF_INCOME_LEVEL = BMF_INCOME_CODE )
d <- d %>% rename( F990_FORM_YEAR = ORG_FISCAL_YEAR )
d <- d %>% rename( ORG_CORP_FORM = BMF_CORP_FORM ) d
<- c(
new.order
"EIN", "EIN2",
"ORG_NAME",
"ORG_NAME_SEC",
"ORG_PERS_ICO",
"ORG_FISCAL_PERIOD",
"ORG_TYPE_501C",
"ORG_TYPE_PF",
"ORG_CORP_FORM",
"ORG_RULING_DATE",
"ORG_RULING_YEAR",
"ORG_YEAR_FIRST",
"ORG_YEAR_LAST",
"ORG_YEAR_COUNT",
"NTEE_IRS",
"NTEE_NCCS",
"NTEEV2",
"NCCS_LEVEL_1",
"NCCS_LEVEL_2",
"NCCS_LEVEL_3",
"BMF_SUBSECTION_CODE",
"BMF_DEDUCTIBILITY_CODE",
"BMF_FILING_REQ_CODE",
"BMF_PF_FILING_REQ_CODE",
"BMF_FOUNDATION_CODE",
"BMF_STATUS_CODE",
"BMF_CLASSIFICATION_CODE",
"BMF_AFFILIATION_CODE",
"BMF_GROUP_EXEMPT_NUM",
"BMF_INCOME_LEVEL",
"BMF_ASSET_LEVEL",
"F990_FORM_YEAR",
"F990_TOTAL_REVENUE_RECENT",
"F990_TOTAL_INCOME_RECENT",
"F990_TOTAL_ASSETS_RECENT",
"F990_ORG_ADDR_STREET",
"F990_ORG_ADDR_CITY",
"F990_ORG_ADDR_STATE",
"F990_ORG_ADDR_ZIP",
"CENSUS_URBAN_AREA",
"CENSUS_STATE_ABBR",
"CENSUS_COUNTY_NAME",
"CENSUS_CBSA_NAME",
"CENSUS_CBSA_FIPS",
"CENSUS_BLOCK_FIPS",
"ORG_ADDR_FULL",
"ORG_ADDR_MATCH",
"LATITUDE",
"LONGITUDE",
"GEOCODER_SCORE",
"GEOCODER_MATCH" )
# MAKE SURE ALL LISTED VARIABLES EXIST
setdiff( new.order, names(d) )
character(0)
# DROPPED VARIABLES
setdiff( names(d), new.order )
character(0)
<- select( d, all_of( new.order ) ) d
1.3 Impute Missing Values
1.3.1 Private Foundations
Private foundations have 501C Status of 501C_03 or 501C_92.
<- d$ORG_TYPE_501C
V1 <- d$BMF_PF_FILING_REQ_CODE
V2 table( V1, V2, useNA="ifany" ) %>% kable()
| | NON-PF (0)| OPERATING PF (1)| NON-OPERATING PF (2)| NON-OPERATING PF (3)| NA|
|:-------|----------:|----------------:|--------------------:|--------------------:|------:|
|501C_00 | 27| 0| 0| 0| 226|
|501C_01 | 709| 0| 0| 0| 91|
|501C_02 | 4295| 0| 0| 0| 5386|
|501C_03 | 1452651| 144762| 20| 237| 905717|
|501C_04 | 75102| 0| 0| 0| 112333|
|501C_05 | 45202| 0| 0| 0| 46128|
|501C_06 | 61227| 0| 0| 0| 57316|
|501C_07 | 48675| 0| 0| 0| 48116|
|501C_08 | 38539| 0| 0| 0| 72095|
|501C_09 | 5587| 0| 0| 0| 13978|
|501C_10 | 15446| 0| 0| 0| 14302|
|501C_11 | 6| 0| 0| 0| 6|
|501C_12 | 5449| 0| 0| 0| 2770|
|501C_13 | 9692| 0| 0| 0| 4235|
|501C_14 | 1529| 0| 0| 0| 4136|
|501C_15 | 623| 0| 0| 0| 1849|
|501C_16 | 11| 0| 0| 0| 16|
|501C_17 | 87| 0| 0| 0| 541|
|501C_18 | 3| 0| 0| 0| 3|
|501C_19 | 26908| 0| 0| 0| 30852|
|501C_20 | 1| 0| 0| 0| 64|
|501C_21 | 2| 0| 0| 0| 28|
|501C_23 | 2| 0| 0| 0| 1|
|501C_24 | 0| 0| 0| 0| 2|
|501C_25 | 594| 0| 0| 0| 2186|
|501C_26 | 5| 0| 0| 0| 6|
|501C_27 | 14| 0| 0| 0| 4|
|501C_29 | 15| 0| 0| 0| 8|
|501C_40 | 220| 0| 0| 0| 30|
|501C_50 | 4| 1| 0| 0| 25|
|501C_70 | 0| 0| 0| 0| 1|
|501C_71 | 1| 0| 0| 0| 0|
|501C_80 | 0| 0| 0| 0| 1383|
|501C_81 | 1| 0| 0| 0| 0|
|501C_82 | 11| 0| 0| 0| 18|
|501C_90 | 0| 0| 0| 0| 790|
|501C_91 | 666| 1| 0| 0| 695|
|501C_92 | 140| 5894| 1| 0| 3493|
|501C_93 | 0| 0| 0| 0| 2440|
|NA | 0| 0| 0| 0| 187369|
The rest of the cases can be imputed as non-PFs. Note BMF_PF_FILING_REQ_CODE is labeled below, it is currently 0-4.
Before:
fable( d$ORG_TYPE_PF, p=T )
ORG_TYPE_PF | Freq |
---|---|
0 | 0.518 |
NA | 0.439 |
1 | 0.044 |
fable( d$BMF_PF_FILING_REQ_CODE, p=T )
BMF_PF_FILING_REQ_CODE | Freq |
---|---|
0 | 0.518 |
NA | 0.439 |
1 | 0.044 |
3 | 0.000 |
2 | 0.000 |
<- d$ORG_TYPE_501C == "501C_03"
RULE1 <- d$ORG_TYPE_501C == "501C_92"
RULE2 <- ! ( RULE1 | RULE2 )
not.a.pf
$ORG_TYPE_PF[ not.a.pf ] <- 0
d$BMF_PF_FILING_REQ_CODE[ not.a.pf ] <- 0 d
After:
fable( d$ORG_TYPE_PF, p=T )
ORG_TYPE_PF | Freq |
---|---|
0 | 0.640 |
NA | 0.317 |
1 | 0.044 |
fable( d$BMF_PF_FILING_REQ_CODE, p=T )
BMF_PF_FILING_REQ_CODE | Freq |
---|---|
0 | 0.640 |
NA | 0.317 |
1 | 0.044 |
3 | 0.000 |
2 | 0.000 |
1.4 Add Labels to Factors
1.4.1 ORG_FISCAL_PERIOD
| code|labels |
|----:|:--------|
| 1|JAN (01) |
| 2|FEB (02) |
| 3|MAR (03) |
| 4|APR (04) |
| 5|MAY (05) |
| 6|JUN (06) |
| 7|JUL (07) |
| 8|AUG (08) |
| 9|SEP (09) |
| 10|OCT (10) |
| 11|NOV (11) |
| 12|DEC (12) |
###
### ORG_FISCAL_PERIOD
###
$ORG_FISCAL_PERIOD <-
dfactor( d$ORG_FISCAL_PERIOD,
levels= fiscal.codes$code,
labels= fiscal.codes$label )
fable( d$ORG_FISCAL_PERIOD )
ORG_FISCAL_PERIOD | Freq |
---|---|
JAN (01) | 258,398 |
FEB (02) | 252,643 |
MAR (03) | 312,381 |
APR (04) | 269,016 |
MAY (05) | 288,255 |
JUN (06) | 478,666 |
JUL (07) | 267,541 |
AUG (08) | 289,599 |
SEP (09) | 297,430 |
OCT (10) | 279,988 |
NOV (11) | 237,079 |
DEC (12) | 232,002 |
NA | 1 |
1.4.2 BMF_STATUS_CODE
|code |label |
|:----|:----------------------------|
|1 |Unconditional Exemption (01) |
|2 |Conditional Exemption (02) |
|12 |Trust (12) |
|25 |Split Interest Trust (25) |
###
### BMF_STATUS_CODE
###
$BMF_STATUS_CODE <-
dfactor( d$BMF_STATUS_CODE,
levels=status$code,
labels=status$label )
fable( d$BMF_STATUS_CODE )
BMF_STATUS_CODE | Freq |
---|---|
Unconditional Exemption (01) | 1,935,874 |
Conditional Exemption (02) | 662 |
Trust (12) | 6,925 |
Split Interest Trust (25) | 899 |
NA | 1,518,639 |
1.4.3 BMF_AFFILIATION_CODE
| code|role |affiliation |label |
|----:|:--------------|:-----------------|:----------------------------|
| 3|INDEPENDENT |NONE |INDEPENDENT (3) |
| 1|PARENT |NETWORK |NETWORK-PARENT (1) |
| 2|INTERMEDIATE |NETWORK |NETWORK-INTERM (2) |
| 6|PARENT-REGULAR |GROUP-EXM |GROUP-EXM-PARENT-REGULAR (6) |
| 8|PARENT-CHURCH |GROUP-EXM |GROUP-EXM-PARENT-CHURCH (8) |
| 7|INTERMEDIATE |GROUP-EXM |GROUP-EXM-INTERM (7) |
| 9|SUBORDINATE |GROUP-EXM/NETWORK |SUBORDINATE (9) |
fable( d$BMF_AFFILIATION_CODE )
BMF_AFFILIATION_CODE | Freq |
---|---|
NA | 1,518,639 |
3 | 1,501,083 |
9 | 428,701 |
0 | 5,671 |
1 | 3,615 |
6 | 3,378 |
2 | 1,100 |
8 | 713 |
7 | 99 |
###
### BMF_AFFILIATION_CODE
###
<- d$BMF_GROUP_EXEMPT_NUM == 0
RULE1 <- ! d$BMF_AFFILIATION_CODE %in% c(1,2)
RULE2
$BMF_AFFILIATION_CODE[ RULE1 & RULE2 ] <- 3
d
$BMF_AFFILIATION_CODE <-
dfactor(
$BMF_AFFILIATION_CODE,
dlevels= aff.codes$code,
labels= aff.codes$label )
fable( d$BMF_AFFILIATION_CODE )
BMF_AFFILIATION_CODE | Freq |
---|---|
INDEPENDENT (3) | 2,040,545 |
NETWORK-PARENT (1) | 3,615 |
NETWORK-INTERM (2) | 1,100 |
GROUP-EXM-PARENT-REGULAR (6) | 3,378 |
GROUP-EXM-INTERM (7) | 99 |
GROUP-EXM-PARENT-CHURCH (8) | 713 |
SUBORDINATE (9) | 428,691 |
NA | 984,858 |
1.4.4 BMF_DEDUCTIBILITY_CODE
# | code|label |desc |
# |----:|:------------|:--------------------------------------------------------------|
# | 0|UDC (0) |Undocumented category |
# | 1|YES (1) |Contributions are deductible |
# | 2|NO (2) |Contributions are not deductible |
# | 4|BY TREATY(4) |Contributions are deductible by treaty (foreign organizations) |
###
### BMF_DEDUCTIBILITY_CODE
###
$BMF_DEDUCTIBILITY_CODE <-
dfactor( d$BMF_DEDUCTIBILITY_CODE,
levels=deduct$code,
labels=deduct$label )
fable( d$BMF_DEDUCTIBILITY_CODE )
BMF_DEDUCTIBILITY_CODE | Freq |
---|---|
UDC (0) | 35,116 |
YES (1) | 1,658,772 |
NO (2) | 249,920 |
BY TREATY(4) | 552 |
NA | 1,518,639 |
1.4.5 BMF_ASSET_LEVEL & BMF_INCOME_LEVEL
# | code|label |
# |----:|:--------------|
# | 0|$0 |
# | 1|$1 to $10k |
# | 2|$10k to $25k |
# | 3|$25k to $100k |
# | 4|$100k to $500k |
# | 5|$500k to $1m |
# | 6|$1m to $5m |
# | 7|$5m to $10m |
# | 8|$10m to $50m |
# | 9|$50m + |
###
### BMF_ASSET_LEVEL
### BMF_INCOME_LEVEL
###
$BMF_ASSET_LEVEL <-
dfactor( d$BMF_ASSET_LEVEL,
levels=asset.level$code,
labels=asset.level$label )
fable( d$BMF_ASSET_LEVEL )
BMF_ASSET_LEVEL | Freq |
---|---|
$0 | 1,244,763 |
$1 to $10k | 76,640 |
$10k to $25k | 41,754 |
$25k to $100k | 119,700 |
$100k to $500k | 184,359 |
$500k to $1m | 73,462 |
$1m to $5m | 117,695 |
$5m to $10m | 30,400 |
$10m to $50m | 38,176 |
$50m + | 17,411 |
NA | 1,518,639 |
$BMF_INCOME_LEVEL <-
dfactor( d$BMF_INCOME_LEVEL,
levels=asset.level$code,
labels=asset.level$label )
fable( d$BMF_INCOME_LEVEL )
BMF_INCOME_LEVEL | Freq |
---|---|
$0 | 1,255,152 |
$1 to $10k | 59,235 |
$10k to $25k | 34,339 |
$25k to $100k | 149,334 |
$100k to $500k | 226,771 |
$500k to $1m | 66,600 |
$1m to $5m | 93,266 |
$5m to $10m | 22,367 |
$10m to $50m | 26,713 |
$50m + | 10,583 |
NA | 1,518,639 |
1.4.6 ORG_CORP_FORM
# | code|label |
# |----:|:---------------|
# | 0|UDC (0) |
# | 1|CORPORATION (1) |
# | 2|TRUST (2) |
# | 3|COOPERATIVE (3) |
# | 4|PARTNERSHIP (4) |
# | 5|ASSOCIATION (5) |
# | 6|UDC (6) |
#########
######### ORG_CORP_FORM
#########
$ORG_CORP_FORM <-
dfactor( d$ORG_CORP_FORM,
levels=corp$code,
labels=corp$label )
fable( d$ORG_CORP_FORM )
ORG_CORP_FORM | Freq |
---|---|
UDC (0) | 10,478 |
CORPORATION (1) | 1,582,603 |
TRUST (2) | 60,422 |
COOPERATIVE (3) | 2,052 |
PARTNERSHIP (4) | 129 |
ASSOCIATION (5) | 484,774 |
UDC (6) | 2,885 |
NA | 1,319,656 |
1.4.7 BMF_FOUNDATION_CODE
# | code|label |
# |----:|:--------------------------------|
# | 0|501C_not_3 (0) |
# | 2|priv op foundation (2) |
# | 3|priv op foundation (other) (3) |
# | 4|priv non-op foundation (4) |
# | 9|status suspended (9) |
# | 10|church (10) |
# | 11|school (11) |
# | 12|hospital/ med research (12) |
# | 13|public university support (13) |
# | 14|governmental unit (14) |
# | 15|gov or public support (15) |
# | 16|one-third investment or ubi (16) |
# | 17|subsidiary (17) |
# | 18|public safety (18) |
# | 21|supporting org type1 (21) |
# | 22|supporting org type2 (22) |
# | 23|supporting org type3 (23) |
# | 24|supporting org type3 nfi (24) |
###
### BMF_FOUNDATION_CODE
###
$BMF_FOUNDATION_CODE <-
dfactor( d$BMF_FOUNDATION_CODE,
levels=fnd$code,
labels=fnd$label )
fable( d$BMF_FOUNDATION_CODE )
BMF_FOUNDATION_CODE | Freq |
---|---|
501C_not_3 (0) | 766,790 |
priv op foundation (2) | 518 |
priv op foundation (other) (3) | 13,215 |
priv non-op foundation (4) | 234,457 |
status suspended (9) | 2,348 |
church (10) | 315,158 |
school (11) | 38,545 |
hospital/ med research (12) | 10,220 |
public university support (13) | 2,697 |
governmental unit (14) | 527 |
gov or public support (15) | 1,086,466 |
one-third investment or ubi (16) | 680,048 |
subsidiary (17) | 33,274 |
public safety (18) | 102 |
supporting org type1 (21) | 5,211 |
supporting org type2 (22) | 1,122 |
supporting org type3 (23) | 804 |
supporting org type3 nfi (24) | 300 |
NA | 271,197 |
1.4.8 NCCS_LEVEL_1
###
### NCCS_LEVEL_1
###
$NCCS_LEVEL_1 <-
dfactor( d$NCCS_LEVEL_1,
levels=L1.levels,
labels=L1.labels )
fable( d$NCCS_LEVEL_1 )
NCCS_LEVEL_1 | Freq |
---|---|
NONPROFIT (501C_not3) | 953,649 |
CHARITY (501C3) | 2,247,977 |
PRIVATE FOUNDATION (501C3-PF) | 252,928 |
UNDEFINED | 8,428 |
NA | 17 |
1.4.9 NCCS_LEVEL_2
###
### NCCS_LEVEL_2
###
$NCCS_LEVEL_2 <-
dfactor( d$NCCS_LEVEL_2,
levels=L2.levels,
labels=L2.labels )
fable( d$NCCS_LEVEL_2 )
NCCS_LEVEL_2 | Freq |
---|---|
OPERATING (O) | 2,865,354 |
SUPPORTING (S) | 468,038 |
MUTUAL/MEMBER (M) | 129,590 |
NA | 17 |
1.4.10 NCCS_LEVEL_3
|code |label |
|:----|:---------------------------------------|
|AR |Arts, culture and humanities(AR) |
|ED |Education(ED) |
|EN |Environment/animals(EN) |
|HE |Health(HE) |
|HS |Human Services(HS) |
|IN |International, foreign affairs(IN) |
|MO |Other mutual benefit(MO) |
|MR |Pension and retirement funds(MR) |
|PB |Public, societal benefit(PB) |
|RE |Religion related(RE) |
|UN |Unknown, unclassified(UN) |
|ZA |Single organization support(ZA) |
|ZB |Fundraising within NTEE major group(ZB) |
|ZC |Private grantmaking foundations(ZC) |
|ZD |Public foundations(ZD) |
|ZE |General fundraising(ZE) |
|ZF |Other Supporting Public Benefit(ZF) |
###
### NCCS_LEVEL_3
###
$NCCS_LEVEL_3[ d$NCCS_LEVEL_3 == "-" ] <- NA
d$NCCS_LEVEL_3[ d$NCCS_LEVEL_3 == "" ] <- NA
d
$NCCS_LEVEL_3 <-
dfactor( d$NCCS_LEVEL_3,
levels= level3.codes$code,
labels= level3.codes$label )
fable( d$NCCS_LEVEL_3 )
NCCS_LEVEL_3 | Freq |
---|---|
Arts, culture and humanities(AR) | 243,094 |
Education(ED) | 398,845 |
Environment/animals(EN) | 118,606 |
Health(HE) | 198,935 |
Human Services(HS) | 904,014 |
International, foreign affairs(IN) | 42,230 |
Other mutual benefit(MO) | 70,057 |
Pension and retirement funds(MR) | 289 |
Public, societal benefit(PB) | 477,413 |
Religion related(RE) | 406,963 |
Unknown, unclassified(UN) | 167,378 |
Single organization support(ZA) | 47,232 |
Fundraising within NTEE major group(ZB) | 21,291 |
Private grantmaking foundations(ZC) | 143,992 |
Public foundations(ZD) | 26,788 |
General fundraising(ZE) | 6,358 |
Other Supporting Public Benefit(ZF) | 140,162 |
NA | 49,352 |
1.5 501C Types
Do we need to reconcile IRS and NCCS dictionaries while incorporating new waves of BMF files?
|ORG_TYPE_501C | Freq|
|:-------------|---------:|
|501C_00 | 253|
|501C_01 | 800|
|501C_02 | 9,681|
|501C_03 | 2,503,387|
|501C_04 | 187,435|
|501C_05 | 91,330|
|501C_06 | 118,543|
|501C_07 | 96,791|
|501C_08 | 110,634|
|501C_09 | 19,565|
|501C_10 | 29,748|
|501C_11 | 12|
|501C_12 | 8,219|
|501C_13 | 13,927|
|501C_14 | 5,665|
|501C_15 | 2,472|
|501C_16 | 27|
|501C_17 | 628|
|501C_18 | 6|
|501C_19 | 57,760|
|501C_20 | 65|
|501C_21 | 30|
|501C_23 | 3|
|501C_24 | 2|
|501C_25 | 2,780|
|501C_26 | 11|
|501C_27 | 18|
|501C_29 | 23|
|501C_40 | 250|
|501C_50 | 30|
|501C_70 | 1|
|501C_71 | 1|
|501C_80 | 1,383| # missing from IRS
|501C_81 | 1|
|501C_82 | 29| # missing from IRS
|501C_90 | 790| # missing from IRS
|501C_91 | 1,362| # missing from IRS
|501C_92 | 9,528| # different from IRS ?
|501C_93 | 2,440| # missing from IRS
|NA | 187,369|
##########
########## FROM BMF DOCUMENTATION
##########
TABLE OF EO SUBSECTION AND CLASSIFICATION CODES
Subsection Classification Description
Code Code01 1 Government Instrumentality
02 1 Title-Holding Corporation
03 1 Charitable Organization
03 2 Educational Organization
03 3 Literary Organization
03 4 Organization to Prevent Cruelty to Animals
03 5 Organization to Prevent Cruelty to Children
03 6 Organization for Public Safety Testing
03 7 Religious Organization
03 8 Scientific Organization
04 1 Civic League
04 2 Local Association of Employees
04 3 Social Welfare Organization
05 1 Agricultural Organization
05 2 Horticultural Organization
05 3 Labor Organization
06 1 Board of Trade
06 2 Business League
06 3 Chamber of Commerce
06 4 Real Estate Board
07 1 Pleasure, Recreational, or Social Club
08 1 Fraternal Beneficiary Society, Order or Association
09 1 Voluntary Employees' Beneficiary Association (Non-Govt. Emps.)
09 2 Voluntary Employees' Beneficiary Association (Govt. Emps.)
10 1 Domestic Fraternal Societies and Associations
11 1 Teachers Retirement Fund Assoc.
12 1 Benevolent Life Insurance Assoc.
12 2 Mutual Ditch or Irrigation Co.
12 3 Mutual Cooperative Telephone Co.
12 4 Organization Like Those on Three Preceding Lines
13 1 Burial Association
13 2 Cemetery Company
14 1 Credit Union
14 2 Other Mutual Corp. or Assoc.
15 1 Mutual Insurance Company or Assoc. Other Than Life or Marine
16 1 Corp. Financing Crop Operations
17 1 Supplemental Unemployment Compensation Trust or Plan
18 1 Employee Funded Pension Trust (Created Before 6/25/59)
19 1 Post or Organization of War Veterans
20 1 Legal Service Organization
21 1 Black Lung Trust
22 1 Multiemployer Pension Plan
23 1 Veterans Assoc. Formed Prior to 1880
24 1 Trust Described in Sect. 4049 of ERISA
25 1 Title Holding Co. for Pensions, etc.
26 1 State-Sponsored High Risk Health Insurance Organizations
27 1 State-Sponsored Workers Compensation Reinsurance
29 1 ACA 1322 Qualified Nonprofit Health Insurance Issuers
40 1 Apostolic and Religious Org. (501(d))
50 1 Cooperative Hospital Service Organization (501(e))
60 1 Cooperative Service Organization of Operating Educational Organization (501(f))
70 1 Child Care Organization (501(k))
71 1 Charitable Risk Pool
81 1 Qualified State-Sponsored Tuition Program
92 1 4947(a)(1) - Private Foundation (Form 990PF Filer)
##########
########## FROM NCCS DOCUMENTATION
##########
SUBSECCD
Character20) Subsection code
(code (03=501(c)(3), etc.)
IRS subsection 01 01- Corporations originated under Act of Congress, including Federal Credit Unions
02 02- Title holding corporation for a tax-exempt organization.
03 03- Religious, educational, charitable, scientific, and literary organizations...
04 04- Civic leagues, social welfare organizations, and local associations of employees
05 05- Labor, agricultural, horticultural organizations. These are eduactional or instruct. grps...
06 06- Business leagues, chambers of commerce, real estate boards, etc. formed to improve conditions...
07 07- Social and recreational clubs which provide pleasure, recreation, and social activities.
08 08- Fraternal beneficiary societies and associations, with lodges providing for payment of life...
09 09- Voluntary employees' beneficiary ass'ns (including fed. employees' voluntary beneficiary...
10 10- Domestic fraternal societies and assoc's-lodges devoting their net earnings to charitable...
11 11- Teachers retirement fund associations.
12 12- Benevolent life insurance associations, mutual ditch or irrigation companies, mutual or coop...
13 13- Cemetery companies, providing burial and incidental activities for members.
14 14- State-chartered credit unions, mutual reserve funds, offering loans to members...
15 15- Mutual insurance cos. ar associations, providing insurance to members substantially at cost...
16 16- Cooperative organizations to finance crop operations, in conjunction with activities ...
17 17- Supplemental unemployment benefit trusts, providing payments of suppl. unemployment comp...
18 18- Employee funded pension trusts, providing benefits under a pension plan funded by employees...
19 19- Post or organization of war veterans.
20 20- Trusts for prepaid group legal services, as part of a qual. group legal service plan or plans.
21 21- Black lung trusts, satisfying claims for compensation under Black Lung Acts.
22 22- Multiemployer Pension Plan
23 23- Veterans association formed prior to 1880
24 24-Trust described in Section 4049 of ERISA
25 25- Title Holding Company for Pensions, etc
26 26- State-Sponsored High Risk Health Insurance Organizations
27 27- State-Sponsored Workers Compensation Reinsurance
40 40- Apostolic and religious orgs. - 501(d)
50 50- Cooperative Hospital Service Organization - 501(e)
60 60- Cooperative Service Org. of Operating Educ. Org.- 501(f)
70 70- Child Care Organization - 501(k)
71 71- Charitable Risk Pool
80 80- Farmers' Cooperatives
81 81- Qualified State-Sponsored Tuition Program
82 82- 527 Political Organizations
90 90- 4947(a)(2) Split Interest Trust
91 91- 4947(a)(1) Public Charity (Files 990/990-EZ)
92 92- 4947(a)(1) Private Foundations
93 93- 1381(a)(2) Taxable Farmers Cooperative
CO CO- Unspecified 501(c) Organization Other Than 501(c)(3)
table( d$ORG_TYPE_501C, d$ORG_TYPE_PF, useNA="ifany" ) %>% pander()
0 | 1 | NA | |
---|---|---|---|
501C_00 | 253 | 0 | 0 |
501C_01 | 800 | 0 | 0 |
501C_02 | 9681 | 0 | 0 |
501C_03 | 1452651 | 145019 | 905717 |
501C_04 | 187435 | 0 | 0 |
501C_05 | 91330 | 0 | 0 |
501C_06 | 118543 | 0 | 0 |
501C_07 | 96791 | 0 | 0 |
501C_08 | 110634 | 0 | 0 |
501C_09 | 19565 | 0 | 0 |
501C_10 | 29748 | 0 | 0 |
501C_11 | 12 | 0 | 0 |
501C_12 | 8219 | 0 | 0 |
501C_13 | 13927 | 0 | 0 |
501C_14 | 5665 | 0 | 0 |
501C_15 | 2472 | 0 | 0 |
501C_16 | 27 | 0 | 0 |
501C_17 | 628 | 0 | 0 |
501C_18 | 6 | 0 | 0 |
501C_19 | 57760 | 0 | 0 |
501C_20 | 65 | 0 | 0 |
501C_21 | 30 | 0 | 0 |
501C_23 | 3 | 0 | 0 |
501C_24 | 2 | 0 | 0 |
501C_25 | 2780 | 0 | 0 |
501C_26 | 11 | 0 | 0 |
501C_27 | 18 | 0 | 0 |
501C_29 | 23 | 0 | 0 |
501C_40 | 250 | 0 | 0 |
501C_50 | 30 | 0 | 0 |
501C_70 | 1 | 0 | 0 |
501C_71 | 1 | 0 | 0 |
501C_80 | 1383 | 0 | 0 |
501C_81 | 1 | 0 | 0 |
501C_82 | 29 | 0 | 0 |
501C_90 | 790 | 0 | 0 |
501C_91 | 1362 | 0 | 0 |
501C_92 | 140 | 5895 | 3493 |
501C_93 | 2440 | 0 | 0 |
NA | 0 | 0 | 187369 |
1.6 Composite Codes
1.6.1 Tax Exempt Purpose
Combine 501C Type with BMF “Classification Codes”.
Classification Code is ambiguous. When applying for tax exempt status from the IRS, nonprofit founders report organizational purpose on their application, which is used by the IRS to make a determination on whether the organization qualifies for tax-exempt status. These codes differ from the NTEE taxonomies in that each code is binary (yes/no), and they are NOT mutually exclusive, so a nonprofit mission can fulfill one or several of these purposes.
IRS Tax-Exempt Purpose Codes: [ See Instructions pp 6-7 ]
- Charitable Purpose [yes/no]
- Religious Purpose [yes/no]
- Educational Purpose [yes/no]
- Scientific Purpose [yes/no]
- Literary Purpose [yes/no]
- Public Safety Purpose [yes/no]
- Amateur Sports Purpose [yes/no]
- Prevent Cruelty to Animals and/or Children [yes/no]
The other 501C Types have similar tax-exempt purpose categories:
|CODE |LABEL |
|:----|:------------------------------------------------------------------------------------------|
|1-1 |Government Instrumentality (501C1-1) |
|2-1 |Title-Holding Corporation (501C2-1) |
|3-1 |Charitable Organization (501C3-1) |
|3-2 |Educational Organization (501C3-2) |
|3-3 |Literary Organization (501C3-3) |
|3-4 |Organization to Prevent Cruelty to Animals (501C3-4) |
|3-5 |Organization to Prevent Cruelty to Children (501C3-5) |
|3-6 |Organization for Public Safety Testing (501C3-6) |
|3-7 |Religious Organization (501C3-7) |
|3-8 |Scientific Organization (501C3-8) |
|4-1 |Civic League (501C4-1) |
|4-2 |Local Association of Employees (501C4-2) |
|4-3 |Social Welfare Organization (501C4-3) |
|5-1 |Agricultural Organization (501C5-1) |
|5-2 |Horticultural Organization (501C5-2) |
|5-3 |Labor Organization (501C5-3) |
|6-1 |Board of Trade (501C6-1) |
|6-2 |Business League (501C6-2) |
|6-3 |Chamber of Commerce (501C6-3) |
|6-4 |Real Estate Board (501C6-4) |
|7-1 |Pleasure, Recreational, or Social Club (501C7-1) |
|8-1 |Fraternal Beneficiary Society, Order or Association (501C8-1) |
|9-1 |Voluntary Employees Beneficiary Association (Non-Govt Emps) (501C9-1) |
|9-2 |Voluntary Employees Beneficiary Association (Govt Emps) (501C9-2) |
|10-1 |Domestic Fraternal Societies and Associations (501C10-1) |
|11-1 |Teachers Retirement Fund Assoc (501C11-1) |
|12-1 |Benevolent Life Insurance Assoc (501C12-1) |
|12-2 |Mutual Ditch or Irrigation Co (501C12-2) |
|12-3 |Mutual Cooperative Telephone Co (501C12-3) |
|12-4 |Organization Like Those on Three Preceding Lines (501C12-4) |
|13-1 |Burial Association (501C13-1) |
|13-2 |Cemetery Company (501C13-2) |
|14-1 |Credit Union (501C14-1) |
|14-2 |Other Mutual Corp or Assoc (501C14-2) |
|15-1 |Mutual Insurance Company or Assoc Other Than Life or Marine (501C15-1) |
|16-1 |Corp Financing Crop Operations (501C16-1) |
|17-1 |Supplemental Unemployment Compensation Trust or Plan (501C17-1) |
|18-1 |Employee Funded Pension Trust (501C18-1) |
|19-1 |Post or Organization of War Veterans (501C19-1) |
|20-1 |Legal Service Organization (501C20-1) |
|21-1 |Black Lung Trust (501C21-1) |
|22-1 |Multiemployer Pension Plan (501C22-1) |
|23-1 |Veterans Assoc Formed Prior to 1880 (501C23-1) |
|24-1 |Trust Described in Sect 4049 of ERISA (501C24-1) |
|25-1 |Title Holding Co for Pensions etc (501C25-1) |
|26-1 |State-Sponsored High Risk Health Insurance Organizations (501C26-1) |
|27-1 |State-Sponsored Workers Compensation Reinsurance (501C27-1) |
|29-1 |ACA 1322 Qualified Nonprofit Health Insurance Issuers (501C29-1) |
|40-1 |Apostolic and Religious Org (501(d)) (501C40-1) |
|50-1 |Cooperative Hospital Service Organization (501(e)) (501C50-1) |
|60-1 |Cooperative Service Organization of Operating Educational Organization (501(f)) (501C60-1) |
|70-1 |Child Care Organization (501(k)) (501C70-1) |
|71-1 |Charitable Risk Pool (501C71-1) |
|81-1 |Qualified State-Sponsored Tuition Program (501C81-1) |
|92-1 |4947(a)(1) - Private Foundation (Form 990PF Filer) (501C92-1) |
###
### BMF_TAX_EXEMPT_PURP
###
### THIS TAKES A COUPLE OF HOURS TO RUN -
### CURRENTLY SET TO A CACHED VERSION
<- taxexp.purp
code.table
<- function( x1, x2 ){
get_codes if( is.na(x2) | is.na(x1) )
return(NA) }
{ <- stringr::str_split_1( as.character(x2), "" )
xx <- paste0( x1, "-", xx )
X <- data.frame( CD=X )
df <- merge( df, code.table ) %>% na.omit()
df <- paste0( df$LABEL, collapse=" ;; " )
code return( code )
}
<-
d.codes ::map2_chr(
purrr$BMF_SUBSECTION_CODE,
d$BMF_CLASSIFICATION_CODE,
d
get_codes )
$BMF_TAX_EXEMPT_PURP <- d.codes
d
<- dplyr::select( d, EIN2, BMF_TAX_EXEMPT_PURP )
dsub <- na.omit( dsub )
dsub write.csv( dsub, "data/BMF_TAX_EXEMPT_PURP.csv", row.names=F )
saveRDS( dsub, "data/BMF_TAX_EXEMPT_PURP.rds" ) # one-tenth size
<- readRDS( "data/BMF_TAX_EXEMPT_PURP.rds" )
purpose.cats <- merge( d, purpose.cats ) d
New labeled version examples:
| BMF_TAX_EXEMPT_PURP| Freq|
|----------------------------------------------------------------------:|---------:|
| NA| 1,570,547|
| Charitable Organization (501C3-1)| 722,351|
| Religious Organization (501C3-7)| 265,283|
|Charitable Organization (501C3-1) ;; Educational Organization (501C3-2)| 229,645|
| Educational Organization (501C3-2)| 214,094|
| Pleasure, Recreational, or Social Club (501C7-1)| 48,616|
| Charitable Organization (501C3-1) ;; Religious Organization (501C3-7)| 47,822|
| Social Welfare Organization (501C4-3)| 42,408|
| Fraternal Beneficiary Society, Order or Association (501C8-1)| 38,498|
| Business League (501C6-2)| 37,744|
| Labor Organization (501C5-3)| 34,396|
| Post or Organization of War Veterans (501C19-1)| 26,790|
| Civic League (501C4-1)| 24,938|
1.6.2 Filing Requirement Codes
Combine BMF_FILING_REQ_CODE and BMF_PF_FILING_REQ_CODE.
Currently private foundations appear in the “not required to file” category (00) in BMF_FILING_REQ_CODE. This is confusing because the field contains every type of 990 form except the 990-PF, so combining BMF_FILING_REQ_CODE and BMF_PF_FILING_REQ_CODE avoids confusion.
# |CODE |FORM |FRC |
# |:----|:-----------------|:--------------------------|
# |01 |990/990-EZ |990/990-EZ (01) |
# |02 |990-N |990-N (02) |
# |PF |990-PF |990-PF (00+PF={1,2,3}) | # new group
# |03 |GROUP RETURN |GROUP RETURN (03) |
# |04 |990-BL |990-BL (04) |
# |06 |NOT RQ: CHURCH |NOT RQ: CHURCH (06) |
# |07 |NOT RQ: 501C1 GOV |NOT RQ: 501C1 GOV (07) |
# |13 |NOT RQ: RELIG ORG |NOT RQ: RELIG ORG (13) |
# |14 |NOT RQ: STATE DIV |NOT RQ: STATE DIV (14) |
# |00 |NOT RQ: ALL OTHER |NOT RQ: ALL OTHER (00) |
# |11 |UNKNOWN |Undocumented Category (11) |
<-
frc structure(list(CODE = c("01", "02", "PF", "03", "04", "06", "07", "13",
"14", "00","11"), FORM = c("990/990-EZ", "990-N", "990-PF", "GROUP RETURN", "990-BL",
"NOT RQ: CHURCH", "NOT RQ: 501C1 GOV", "NOT RQ: RELIG ORG", "NOT RQ: STATE DIV",
"NOT RQ: ALL OTHER","UNKNOWN"), FRC = c("990/990EZ (01)", "990-N (02)", "990PF (00+PF={1,2,3})", "GROUP RETURN (03)",
"990-BL (04)", "NRQ: CHURCH (06)", "NRQ: 501C1 GOV (07)", "NOT RQ: RELIG ORG (13)",
"NOT RQ: STATE DIV (14)", "NOT RQ: ALL OTHER (00)","Undocumented Category (11)")), row.names = c(NA,
-11L), class = "data.frame")
# unique(x) %>% as.character() %>% dput()
# c(NA, "10", "20", "1", "0", "2", "6", "60", "3", "11", "30",
# "13", "130", "140", "70", "21", "7", "40", "14", "4", "888")
# |BMF_FILING_REQ_CODE | Freq|
# |:-------------------|---------:|
# |0 | 221,065|
# |1 | 570,568|
# |2 | 1,126,372|
# |3 | 6,856|
# |4 | 14|
# |6 | 279,358|
# |7 | 747|
# |10 | 118,332| # recode as 1
# |11 | 671|
# |13 | 8,701|
# |14 | 2,602|
# |20 | 475,818| # recode as 2
# |21 | 64|
# |30 | 179| # recode as 3
# |40 | 17| # recode as 4
# |60 | 12,620| # recode as 6
# |70 | 27| # recode as 7
# |130 | 1,213| # recode as 13
# |140 | 67| # recode as 14
# |888 | 1|
# |NA | 637,707|
# PF FILING REQUIREMENT CODE
# |BMF_PF_FILING_REQ_CODE | Freq|
# |:----------------------|---------:|
# |0 | 1,793,444|
# |NA | 1,518,639|
# |1 | 150,658| # private operating foundation
# |3 | 237| # private non-operating foundation
# |2 | 21| # private non-operating foundation
<- d$BMF_FILING_REQ_CODE
x
# DROP ZEROS AT THE END
<- gsub( "([0-9]{2})(0$)", "\\1", x )
x <- gsub( "([0-9]{1})(0$)", "\\1", x )
x == 888 ] <- NA
x[ x
# ADD LEADING ZEROS
<- str_pad( x, width=2, side="left", pad="0" )
x
# FIX PF: IS LISTED AS "NOT REQUIRED (00)" OTHERWISE
$BMF_PF_FILING_REQ_CODE %in% c(1,2,3) ] <- "PF"
x[ d
######
###### BMF_FILING_REQ_CODE
######
# CONVERT TO FACTORS
$BMF_FILING_REQ_CODE <-
dfactor( x,
levels=frc$CODE,
labels=frc$FRC )
fable( d$BMF_FILING_REQ_CODE )
BMF_FILING_REQ_CODE | Freq |
---|---|
990/990EZ (01) | 696,890 |
990-N (02) | 1,628,405 |
990PF (00+PF={1,2,3}) | 151,568 |
GROUP RETURN (03) | 7,399 |
990-BL (04) | 31 |
NRQ: CHURCH (06) | 293,132 |
NRQ: 501C1 GOV (07) | 782 |
NOT RQ: RELIG ORG (13) | 10,162 |
NOT RQ: STATE DIV (14) | 2,673 |
NOT RQ: ALL OTHER (00) | 71,408 |
Undocumented Category (11) | 671 |
NA | 653,446 |
######
###### BMF_PF_FILING_REQ_CODE
######
<-
pf.labels c( "NON-PF (0)","OPERATING PF (1)",
"NON-OPERATING PF (2)","NON-OPERATING PF (3)" )
<- c(0,1,2,3)
pf.levels
<- data.frame( code=pf.levels, label=pf.labels )
frc.pf
$BMF_PF_FILING_REQ_CODE <-
dfactor( d$BMF_PF_FILING_REQ_CODE,
levels=c(0,1,2,3),
labels=pf.labels )
fable( d$BMF_PF_FILING_REQ_CODE )
BMF_PF_FILING_REQ_CODE | Freq |
---|---|
NON-PF (0) | 2,232,678 |
OPERATING PF (1) | 151,310 |
NON-OPERATING PF (2) | 21 |
NON-OPERATING PF (3) | 237 |
NA | 1,132,321 |
1.7 Select Variables
<- c(
final.order
"EIN", "EIN2",
"ORG_NAME",
"ORG_NAME_SEC",
"ORG_PERS_ICO",
"ORG_FISCAL_PERIOD",
"ORG_TYPE_501C",
"ORG_TYPE_PF",
"ORG_CORP_FORM",
"ORG_RULING_DATE",
"ORG_RULING_YEAR",
"ORG_YEAR_FIRST",
"ORG_YEAR_LAST",
"ORG_YEAR_COUNT",
"NTEE_IRS",
"NTEE_NCCS",
"NTEEV2",
"NCCS_LEVEL_1",
"NCCS_LEVEL_2",
"NCCS_LEVEL_3",
"BMF_SUBSECTION_CODE",
"BMF_TAX_EXEMPT_PURP",
"BMF_CLASSIFICATION_CODE",
"BMF_DEDUCTIBILITY_CODE",
"BMF_FILING_REQ_CODE",
"BMF_PF_FILING_REQ_CODE",
"BMF_FOUNDATION_CODE",
"BMF_STATUS_CODE",
"BMF_AFFILIATION_CODE",
"BMF_GROUP_EXEMPT_NUM",
"BMF_INCOME_LEVEL",
"BMF_ASSET_LEVEL",
"F990_FORM_YEAR",
"F990_TOTAL_REVENUE_RECENT",
"F990_TOTAL_INCOME_RECENT",
"F990_TOTAL_ASSETS_RECENT",
"F990_ORG_ADDR_STREET",
"F990_ORG_ADDR_CITY",
"F990_ORG_ADDR_STATE",
"F990_ORG_ADDR_ZIP",
"CENSUS_URBAN_AREA",
"CENSUS_STATE_ABBR",
"CENSUS_COUNTY_NAME",
"CENSUS_CBSA_NAME",
"CENSUS_CBSA_FIPS",
"CENSUS_BLOCK_FIPS",
"ORG_ADDR_FULL",
"ORG_ADDR_MATCH",
"LATITUDE",
"LONGITUDE",
"GEOCODER_SCORE",
"GEOCODER_MATCH" )
# MAKE SURE ALL LISTED VARIABLES EXIST
setdiff( final.order, names(d) )
character(0)
# DROPPED VARIABLES
setdiff( names(d), final.order )
character(0)
<- select( d, all_of( final.order ) ) d
1.8 Save Objects
saveRDS( d, "data/BMF-LABELED-TEMP.rds" )
write_csv( d, "data/BMF-LABELED-TEMP.csv", na="" )
<-
dictionaries list(
ORG_CORP_FORM = corp,
ORG_FISCAL_PERIOD = fiscal.codes,
BMF_AFFILIATION_CODE = aff.codes,
BMF_ASSET_LEVEL = asset.level,
BMF_DEDUCTIBILITY_CODE = deduct,
BMF_FILING_REQ_CODE = frc,
BMF_FOUNDATION_CODE = fnd,
BMF_INCOME_LEVEL = asset.level,
BMF_PF_FILING_REQ_CODE = frc.pf,
BMF_STATUS_CODE = status,
BMF_TAX_EXEMPT_PURP = taxexp.purp,
NCCS_LEVEL_1 = level1.codes,
NCCS_LEVEL_2 = level2.codes,
NCCS_LEVEL_3 = level3.codes )
dump( "dictionaries", file="data/DICTIONARIES.R" )