1  Labeling Variables

1.1 Packages

library( tidyverse )   # data wrangling 
library( data.table )  # data wrangling 
library( pander )      # pretty tables 
library( knitr )       # pretty tables
library( DiagrammeR )  # drawing diagrams
library( ggthemes )    # graphics 
library( gridExtra )   # graphics

# CUSTOM FUNCTIONS: fable()
source( "R/functions.R" )

1.2 Load Data

# REMOTE OPTION 
URL <- "https://nccsdata.s3.us-east-1.amazonaws.com/harmonized/bmf/unified/BMF_UNIFIED_V1.1.csv"
d <- fread( URL )
d <- fread( "data/BMF_UNIFIED_V1.1.csv" )

n_501c <- str_pad( d$BMF_SUBSECTION_CODE, width=2, pad="0" )
d$ORG_TYPE_501C <- paste0( "501C_", n_501c )
d$ORG_TYPE_501C[ d$ORG_TYPE_501C == "501C_NA" ] <- NA
       
d$ORG_TYPE_PF <- d$BMF_PF_FILING_REQ_CODE %in% c(1,2,3) %>% as.numeric()
d$ORG_TYPE_PF[ is.na(d$BMF_PF_FILING_REQ_CODE) ] <- NA

d <- d %>% rename(         ORG_NAME = ORG_NAME_CURRENT      )
d <- d %>% rename(    BMF_CORP_FORM = BMF_ORGANIZATION_CODE )
d <- d %>% rename(  BMF_ASSET_LEVEL = BMF_ASSET_CODE        )
d <- d %>% rename( BMF_INCOME_LEVEL = BMF_INCOME_CODE       )
d <- d %>% rename(   F990_FORM_YEAR = ORG_FISCAL_YEAR       ) 
d <- d %>% rename(    ORG_CORP_FORM = BMF_CORP_FORM         ) 
new.order <- c( 
  
  "EIN", "EIN2", 
  "ORG_NAME", 
  "ORG_NAME_SEC", 
  "ORG_PERS_ICO", 
  "ORG_FISCAL_PERIOD", 
  "ORG_TYPE_501C", 
  "ORG_TYPE_PF", 
  "ORG_CORP_FORM",
  
  "ORG_RULING_DATE", 
  "ORG_RULING_YEAR",
  "ORG_YEAR_FIRST", 
  "ORG_YEAR_LAST", 
  "ORG_YEAR_COUNT", 

  "NTEE_IRS", 
  "NTEE_NCCS",  
  "NTEEV2", 
  "NCCS_LEVEL_1", 
  "NCCS_LEVEL_2", 
  "NCCS_LEVEL_3",

  "BMF_SUBSECTION_CODE",   
  "BMF_DEDUCTIBILITY_CODE", 
  "BMF_FILING_REQ_CODE", 
  "BMF_PF_FILING_REQ_CODE",
  "BMF_FOUNDATION_CODE",    
  "BMF_STATUS_CODE", 
  "BMF_CLASSIFICATION_CODE", 
  "BMF_AFFILIATION_CODE",   
  "BMF_GROUP_EXEMPT_NUM",
  "BMF_INCOME_LEVEL",        
  "BMF_ASSET_LEVEL", 

  "F990_FORM_YEAR",
  "F990_TOTAL_REVENUE_RECENT", 
  "F990_TOTAL_INCOME_RECENT", 
  "F990_TOTAL_ASSETS_RECENT",   
 
  "F990_ORG_ADDR_STREET",
  "F990_ORG_ADDR_CITY", 
  "F990_ORG_ADDR_STATE", 
  "F990_ORG_ADDR_ZIP", 
 
  "CENSUS_URBAN_AREA",
  "CENSUS_STATE_ABBR",
  "CENSUS_COUNTY_NAME",
  "CENSUS_CBSA_NAME", 
  "CENSUS_CBSA_FIPS",
  "CENSUS_BLOCK_FIPS",
   
  "ORG_ADDR_FULL", 
  "ORG_ADDR_MATCH", 
  "LATITUDE", 
  "LONGITUDE", 
  "GEOCODER_SCORE", 
  "GEOCODER_MATCH"  )

# MAKE SURE ALL LISTED VARIABLES EXIST 
setdiff( new.order, names(d) )

character(0)

# DROPPED VARIABLES 
setdiff( names(d), new.order )

character(0)

d <- select( d, all_of( new.order ) )

1.3 Impute Missing Values

1.3.1 Private Foundations

Private foundations have 501C Status of 501C_03 or 501C_92.

V1 <- d$ORG_TYPE_501C
V2 <- d$BMF_PF_FILING_REQ_CODE
table( V1, V2, useNA="ifany" ) %>% kable()
|        | NON-PF (0)| OPERATING PF (1)| NON-OPERATING PF (2)| NON-OPERATING PF (3)|     NA|
|:-------|----------:|----------------:|--------------------:|--------------------:|------:|
|501C_00 |         27|                0|                    0|                    0|    226|
|501C_01 |        709|                0|                    0|                    0|     91|
|501C_02 |       4295|                0|                    0|                    0|   5386|
|501C_03 |    1452651|           144762|                   20|                  237| 905717|
|501C_04 |      75102|                0|                    0|                    0| 112333|
|501C_05 |      45202|                0|                    0|                    0|  46128|
|501C_06 |      61227|                0|                    0|                    0|  57316|
|501C_07 |      48675|                0|                    0|                    0|  48116|
|501C_08 |      38539|                0|                    0|                    0|  72095|
|501C_09 |       5587|                0|                    0|                    0|  13978|
|501C_10 |      15446|                0|                    0|                    0|  14302|
|501C_11 |          6|                0|                    0|                    0|      6|
|501C_12 |       5449|                0|                    0|                    0|   2770|
|501C_13 |       9692|                0|                    0|                    0|   4235|
|501C_14 |       1529|                0|                    0|                    0|   4136|
|501C_15 |        623|                0|                    0|                    0|   1849|
|501C_16 |         11|                0|                    0|                    0|     16|
|501C_17 |         87|                0|                    0|                    0|    541|
|501C_18 |          3|                0|                    0|                    0|      3|
|501C_19 |      26908|                0|                    0|                    0|  30852|
|501C_20 |          1|                0|                    0|                    0|     64|
|501C_21 |          2|                0|                    0|                    0|     28|
|501C_23 |          2|                0|                    0|                    0|      1|
|501C_24 |          0|                0|                    0|                    0|      2|
|501C_25 |        594|                0|                    0|                    0|   2186|
|501C_26 |          5|                0|                    0|                    0|      6|
|501C_27 |         14|                0|                    0|                    0|      4|
|501C_29 |         15|                0|                    0|                    0|      8|
|501C_40 |        220|                0|                    0|                    0|     30|
|501C_50 |          4|                1|                    0|                    0|     25|
|501C_70 |          0|                0|                    0|                    0|      1|
|501C_71 |          1|                0|                    0|                    0|      0|
|501C_80 |          0|                0|                    0|                    0|   1383|
|501C_81 |          1|                0|                    0|                    0|      0|
|501C_82 |         11|                0|                    0|                    0|     18|
|501C_90 |          0|                0|                    0|                    0|    790|
|501C_91 |        666|                1|                    0|                    0|    695|
|501C_92 |        140|             5894|                    1|                    0|   3493|
|501C_93 |          0|                0|                    0|                    0|   2440|
|NA      |          0|                0|                    0|                    0| 187369|

The rest of the cases can be imputed as non-PFs. Note BMF_PF_FILING_REQ_CODE is labeled below, it is currently 0-4.

Before:

fable( d$ORG_TYPE_PF, p=T )
ORG_TYPE_PF Freq
0 0.518
NA 0.439
1 0.044
fable( d$BMF_PF_FILING_REQ_CODE, p=T )
BMF_PF_FILING_REQ_CODE Freq
0 0.518
NA 0.439
1 0.044
3 0.000
2 0.000
RULE1 <- d$ORG_TYPE_501C == "501C_03"
RULE2 <- d$ORG_TYPE_501C == "501C_92" 
not.a.pf <- ! ( RULE1 | RULE2 )

d$ORG_TYPE_PF[  not.a.pf ]            <- 0
d$BMF_PF_FILING_REQ_CODE[ not.a.pf  ] <- 0

After:

fable( d$ORG_TYPE_PF, p=T )
ORG_TYPE_PF Freq
0 0.640
NA 0.317
1 0.044
fable( d$BMF_PF_FILING_REQ_CODE, p=T )
BMF_PF_FILING_REQ_CODE Freq
0 0.640
NA 0.317
1 0.044
3 0.000
2 0.000



1.4 Add Labels to Factors

1.4.1 ORG_FISCAL_PERIOD

| code|labels   |
|----:|:--------|
|    1|JAN (01) |
|    2|FEB (02) |
|    3|MAR (03) |
|    4|APR (04) |
|    5|MAY (05) |
|    6|JUN (06) |
|    7|JUL (07) |
|    8|AUG (08) |
|    9|SEP (09) |
|   10|OCT (10) |
|   11|NOV (11) |
|   12|DEC (12) |
###
###   ORG_FISCAL_PERIOD
###

d$ORG_FISCAL_PERIOD <- 
  factor( d$ORG_FISCAL_PERIOD,
          levels= fiscal.codes$code,
          labels= fiscal.codes$label  )

fable( d$ORG_FISCAL_PERIOD )
ORG_FISCAL_PERIOD Freq
JAN (01) 258,398
FEB (02) 252,643
MAR (03) 312,381
APR (04) 269,016
MAY (05) 288,255
JUN (06) 478,666
JUL (07) 267,541
AUG (08) 289,599
SEP (09) 297,430
OCT (10) 279,988
NOV (11) 237,079
DEC (12) 232,002
NA 1

1.4.2 BMF_STATUS_CODE

|code |label                        |
|:----|:----------------------------|
|1    |Unconditional Exemption (01) |
|2    |Conditional Exemption (02)   |
|12   |Trust (12)                   |
|25   |Split Interest Trust (25)    |
###
###   BMF_STATUS_CODE
###

d$BMF_STATUS_CODE <- 
  factor( d$BMF_STATUS_CODE,
          levels=status$code,
          labels=status$label )


fable( d$BMF_STATUS_CODE )
BMF_STATUS_CODE Freq
Unconditional Exemption (01) 1,935,874
Conditional Exemption (02) 662
Trust (12) 6,925
Split Interest Trust (25) 899
NA 1,518,639

1.4.3 BMF_AFFILIATION_CODE

| code|role           |affiliation       |label                        |
|----:|:--------------|:-----------------|:----------------------------|
|    3|INDEPENDENT    |NONE              |INDEPENDENT (3)              |
|    1|PARENT         |NETWORK           |NETWORK-PARENT (1)           |
|    2|INTERMEDIATE   |NETWORK           |NETWORK-INTERM (2)           |
|    6|PARENT-REGULAR |GROUP-EXM         |GROUP-EXM-PARENT-REGULAR (6) |
|    8|PARENT-CHURCH  |GROUP-EXM         |GROUP-EXM-PARENT-CHURCH (8)  |
|    7|INTERMEDIATE   |GROUP-EXM         |GROUP-EXM-INTERM (7)         |
|    9|SUBORDINATE    |GROUP-EXM/NETWORK |SUBORDINATE (9)              |
fable( d$BMF_AFFILIATION_CODE )
BMF_AFFILIATION_CODE Freq
NA 1,518,639
3 1,501,083
9 428,701
0 5,671
1 3,615
6 3,378
2 1,100
8 713
7 99
###
###   BMF_AFFILIATION_CODE
###

RULE1 <- d$BMF_GROUP_EXEMPT_NUM == 0
RULE2 <- ! d$BMF_AFFILIATION_CODE %in% c(1,2)
  
d$BMF_AFFILIATION_CODE[ RULE1 & RULE2 ] <- 3

d$BMF_AFFILIATION_CODE <- 
  factor( 
    d$BMF_AFFILIATION_CODE, 
    levels= aff.codes$code,
    labels= aff.codes$label )

fable( d$BMF_AFFILIATION_CODE )
BMF_AFFILIATION_CODE Freq
INDEPENDENT (3) 2,040,545
NETWORK-PARENT (1) 3,615
NETWORK-INTERM (2) 1,100
GROUP-EXM-PARENT-REGULAR (6) 3,378
GROUP-EXM-INTERM (7) 99
GROUP-EXM-PARENT-CHURCH (8) 713
SUBORDINATE (9) 428,691
NA 984,858

1.4.4 BMF_DEDUCTIBILITY_CODE

# | code|label        |desc                                                           |
# |----:|:------------|:--------------------------------------------------------------|
# |    0|UDC (0)      |Undocumented category                                          |
# |    1|YES (1)      |Contributions are deductible                                   |
# |    2|NO (2)       |Contributions are not deductible                               |
# |    4|BY TREATY(4) |Contributions are deductible by treaty (foreign organizations) |
###
###   BMF_DEDUCTIBILITY_CODE
###

d$BMF_DEDUCTIBILITY_CODE <- 
  factor( d$BMF_DEDUCTIBILITY_CODE,
          levels=deduct$code,
          labels=deduct$label )

fable( d$BMF_DEDUCTIBILITY_CODE )
BMF_DEDUCTIBILITY_CODE Freq
UDC (0) 35,116
YES (1) 1,658,772
NO (2) 249,920
BY TREATY(4) 552
NA 1,518,639

1.4.5 BMF_ASSET_LEVEL & BMF_INCOME_LEVEL

# | code|label          |
# |----:|:--------------|
# |    0|$0             |
# |    1|$1 to $10k     |
# |    2|$10k to $25k   |
# |    3|$25k to $100k  |
# |    4|$100k to $500k |
# |    5|$500k to $1m   |
# |    6|$1m to $5m     |
# |    7|$5m to $10m    |
# |    8|$10m to $50m   |
# |    9|$50m +         |
###
###   BMF_ASSET_LEVEL
###   BMF_INCOME_LEVEL
###

d$BMF_ASSET_LEVEL <-
  factor( d$BMF_ASSET_LEVEL,
          levels=asset.level$code,
          labels=asset.level$label )

fable( d$BMF_ASSET_LEVEL )
BMF_ASSET_LEVEL Freq
$0 1,244,763
$1 to $10k 76,640
$10k to $25k 41,754
$25k to $100k 119,700
$100k to $500k 184,359
$500k to $1m 73,462
$1m to $5m 117,695
$5m to $10m 30,400
$10m to $50m 38,176
$50m + 17,411
NA 1,518,639
d$BMF_INCOME_LEVEL <-
  factor( d$BMF_INCOME_LEVEL,
          levels=asset.level$code,
          labels=asset.level$label )

fable( d$BMF_INCOME_LEVEL )
BMF_INCOME_LEVEL Freq
$0 1,255,152
$1 to $10k 59,235
$10k to $25k 34,339
$25k to $100k 149,334
$100k to $500k 226,771
$500k to $1m 66,600
$1m to $5m 93,266
$5m to $10m 22,367
$10m to $50m 26,713
$50m + 10,583
NA 1,518,639

1.4.6 ORG_CORP_FORM

#  | code|label           |
#  |----:|:---------------|
#  |    0|UDC (0)         |
#  |    1|CORPORATION (1) |
#  |    2|TRUST (2)       |
#  |    3|COOPERATIVE (3) |
#  |    4|PARTNERSHIP (4) |
#  |    5|ASSOCIATION (5) |
#  |    6|UDC (6)         |
#########
#########   ORG_CORP_FORM
#########

d$ORG_CORP_FORM <- 
  factor( d$ORG_CORP_FORM,
          levels=corp$code,
          labels=corp$label )


fable( d$ORG_CORP_FORM )
ORG_CORP_FORM Freq
UDC (0) 10,478
CORPORATION (1) 1,582,603
TRUST (2) 60,422
COOPERATIVE (3) 2,052
PARTNERSHIP (4) 129
ASSOCIATION (5) 484,774
UDC (6) 2,885
NA 1,319,656

1.4.7 BMF_FOUNDATION_CODE

# | code|label                            |
# |----:|:--------------------------------|
# |    0|501C_not_3 (0)                   |
# |    2|priv op foundation (2)           |
# |    3|priv op foundation (other) (3)   |
# |    4|priv non-op foundation (4)       |
# |    9|status suspended (9)             |
# |   10|church (10)                      |
# |   11|school (11)                      |
# |   12|hospital/ med research (12)      |
# |   13|public university support (13)   |
# |   14|governmental unit (14)           |
# |   15|gov or public support (15)       |
# |   16|one-third investment or ubi (16) |
# |   17|subsidiary (17)                  |
# |   18|public safety (18)               |
# |   21|supporting org type1 (21)        |
# |   22|supporting org type2 (22)        |
# |   23|supporting org type3 (23)        |
# |   24|supporting org type3 nfi (24)    |
###
###   BMF_FOUNDATION_CODE
###

d$BMF_FOUNDATION_CODE <- 
  factor( d$BMF_FOUNDATION_CODE, 
          levels=fnd$code,
          labels=fnd$label )

fable( d$BMF_FOUNDATION_CODE )
BMF_FOUNDATION_CODE Freq
501C_not_3 (0) 766,790
priv op foundation (2) 518
priv op foundation (other) (3) 13,215
priv non-op foundation (4) 234,457
status suspended (9) 2,348
church (10) 315,158
school (11) 38,545
hospital/ med research (12) 10,220
public university support (13) 2,697
governmental unit (14) 527
gov or public support (15) 1,086,466
one-third investment or ubi (16) 680,048
subsidiary (17) 33,274
public safety (18) 102
supporting org type1 (21) 5,211
supporting org type2 (22) 1,122
supporting org type3 (23) 804
supporting org type3 nfi (24) 300
NA 271,197

1.4.8 NCCS_LEVEL_1

###
###   NCCS_LEVEL_1
###

d$NCCS_LEVEL_1 <- 
  factor( d$NCCS_LEVEL_1,
          levels=L1.levels,
          labels=L1.labels )

fable( d$NCCS_LEVEL_1 )
NCCS_LEVEL_1 Freq
NONPROFIT (501C_not3) 953,649
CHARITY (501C3) 2,247,977
PRIVATE FOUNDATION (501C3-PF) 252,928
UNDEFINED 8,428
NA 17

1.4.9 NCCS_LEVEL_2

###
###   NCCS_LEVEL_2
###

d$NCCS_LEVEL_2 <- 
  factor( d$NCCS_LEVEL_2,
          levels=L2.levels,
          labels=L2.labels )

fable( d$NCCS_LEVEL_2 )
NCCS_LEVEL_2 Freq
OPERATING (O) 2,865,354
SUPPORTING (S) 468,038
MUTUAL/MEMBER (M) 129,590
NA 17

1.4.10 NCCS_LEVEL_3

|code |label                                   |
|:----|:---------------------------------------|
|AR   |Arts, culture and humanities(AR)        |
|ED   |Education(ED)                           |
|EN   |Environment/animals(EN)                 |
|HE   |Health(HE)                              |
|HS   |Human Services(HS)                      |
|IN   |International, foreign affairs(IN)      |
|MO   |Other mutual benefit(MO)                |
|MR   |Pension and retirement funds(MR)        |
|PB   |Public, societal benefit(PB)            |
|RE   |Religion related(RE)                    |
|UN   |Unknown, unclassified(UN)               |
|ZA   |Single organization support(ZA)         |
|ZB   |Fundraising within NTEE major group(ZB) |
|ZC   |Private grantmaking foundations(ZC)     |
|ZD   |Public foundations(ZD)                  |
|ZE   |General fundraising(ZE)                 |
|ZF   |Other Supporting Public Benefit(ZF)     |
###
###   NCCS_LEVEL_3
###

d$NCCS_LEVEL_3[ d$NCCS_LEVEL_3 == "-" ] <- NA
d$NCCS_LEVEL_3[ d$NCCS_LEVEL_3 == ""  ] <- NA

d$NCCS_LEVEL_3 <- 
  factor( d$NCCS_LEVEL_3,
          levels= level3.codes$code,
          labels= level3.codes$label )

fable( d$NCCS_LEVEL_3 )
NCCS_LEVEL_3 Freq
Arts, culture and humanities(AR) 243,094
Education(ED) 398,845
Environment/animals(EN) 118,606
Health(HE) 198,935
Human Services(HS) 904,014
International, foreign affairs(IN) 42,230
Other mutual benefit(MO) 70,057
Pension and retirement funds(MR) 289
Public, societal benefit(PB) 477,413
Religion related(RE) 406,963
Unknown, unclassified(UN) 167,378
Single organization support(ZA) 47,232
Fundraising within NTEE major group(ZB) 21,291
Private grantmaking foundations(ZC) 143,992
Public foundations(ZD) 26,788
General fundraising(ZE) 6,358
Other Supporting Public Benefit(ZF) 140,162
NA 49,352

1.5 501C Types

Do we need to reconcile IRS and NCCS dictionaries while incorporating new waves of BMF files?

|ORG_TYPE_501C |      Freq|
|:-------------|---------:|
|501C_00       |       253|
|501C_01       |       800|
|501C_02       |     9,681|
|501C_03       | 2,503,387|
|501C_04       |   187,435|
|501C_05       |    91,330|
|501C_06       |   118,543|
|501C_07       |    96,791|
|501C_08       |   110,634|
|501C_09       |    19,565|
|501C_10       |    29,748|
|501C_11       |        12|
|501C_12       |     8,219|
|501C_13       |    13,927|
|501C_14       |     5,665|
|501C_15       |     2,472|
|501C_16       |        27|
|501C_17       |       628|
|501C_18       |         6|
|501C_19       |    57,760|
|501C_20       |        65|
|501C_21       |        30|
|501C_23       |         3|
|501C_24       |         2|
|501C_25       |     2,780|
|501C_26       |        11|
|501C_27       |        18|
|501C_29       |        23|
|501C_40       |       250|
|501C_50       |        30|
|501C_70       |         1|
|501C_71       |         1|
|501C_80       |     1,383|  # missing from IRS
|501C_81       |         1|
|501C_82       |        29|  # missing from IRS
|501C_90       |       790|  # missing from IRS
|501C_91       |     1,362|  # missing from IRS
|501C_92       |     9,528|  # different from IRS ?
|501C_93       |     2,440|  # missing from IRS
|NA            |   187,369|
##########
##########   FROM BMF DOCUMENTATION 
##########


TABLE OF EO SUBSECTION AND CLASSIFICATION CODES
Subsection Classification Description
Code Code
01 1 Government Instrumentality
02 1 Title-Holding Corporation
03 1 Charitable Organization
03 2 Educational Organization
03 3 Literary Organization
03 4 Organization to Prevent Cruelty to Animals
03 5 Organization to Prevent Cruelty to Children
03 6 Organization for Public Safety Testing
03 7 Religious Organization
03 8 Scientific Organization
04 1 Civic League
04 2 Local Association of Employees
04 3 Social Welfare Organization
05 1 Agricultural Organization
05 2 Horticultural Organization
05 3 Labor Organization
06 1 Board of Trade
06 2 Business League
06 3 Chamber of Commerce
06 4 Real Estate Board
07 1 Pleasure, Recreational, or Social Club
08 1 Fraternal Beneficiary Society, Order or Association
09 1 Voluntary Employees' Beneficiary Association (Non-Govt. Emps.)
09 2 Voluntary Employees' Beneficiary Association (Govt. Emps.)
10 1 Domestic Fraternal Societies and Associations
11 1 Teachers Retirement Fund Assoc.
12 1 Benevolent Life Insurance Assoc.
12 2 Mutual Ditch or Irrigation Co.
12 3 Mutual Cooperative Telephone Co.
12 4 Organization Like Those on Three Preceding Lines
13 1 Burial Association
13 2 Cemetery Company
14 1 Credit Union
14 2 Other Mutual Corp. or Assoc.
15 1 Mutual Insurance Company or Assoc. Other Than Life or Marine
16 1 Corp. Financing Crop Operations
17 1 Supplemental Unemployment Compensation Trust or Plan
18 1 Employee Funded Pension Trust (Created Before 6/25/59)
19 1 Post or Organization of War Veterans
20 1 Legal Service Organization
21 1 Black Lung Trust
22 1 Multiemployer Pension Plan
23 1 Veterans Assoc. Formed Prior to 1880
24 1 Trust Described in Sect. 4049 of ERISA
25 1 Title Holding Co. for Pensions, etc.
26 1 State-Sponsored High Risk Health Insurance Organizations
27 1 State-Sponsored Workers Compensation Reinsurance
29 1 ACA 1322 Qualified Nonprofit Health Insurance Issuers
40 1 Apostolic and Religious Org. (501(d))
50 1 Cooperative Hospital Service Organization (501(e))
60 1 Cooperative Service Organization of Operating Educational Organization (501(f))
70 1 Child Care Organization (501(k))
71 1 Charitable Risk Pool
81 1 Qualified State-Sponsored Tuition Program
92 1 4947(a)(1) - Private Foundation (Form 990PF Filer)


##########
##########   FROM NCCS DOCUMENTATION 
##########

SUBSECCD
Character
(20)    Subsection code
IRS subsection code (03=501(c)(3), etc.)
01  01- Corporations originated under Act of Congress, including Federal Credit Unions
02  02- Title holding corporation for a tax-exempt organization.
03  03- Religious, educational, charitable, scientific, and literary organizations...
04  04- Civic leagues, social welfare organizations, and local associations of employees
05  05- Labor, agricultural, horticultural organizations. These are eduactional or instruct. grps...
06  06- Business leagues, chambers of commerce, real estate boards, etc. formed to improve conditions...
07  07- Social and recreational clubs which provide pleasure, recreation, and social activities.
08  08- Fraternal beneficiary societies and associations, with lodges providing for payment of life...
09  09- Voluntary employees' beneficiary ass'ns (including fed. employees' voluntary beneficiary...
10  10- Domestic fraternal societies and assoc's-lodges devoting their net earnings to charitable...
11  11- Teachers retirement fund associations.
12  12- Benevolent life insurance associations, mutual ditch or irrigation companies, mutual or coop...
13  13- Cemetery companies, providing burial and incidental activities for members.
14  14- State-chartered credit unions, mutual reserve funds, offering loans to members...
15  15- Mutual insurance cos. ar associations, providing insurance to members substantially at cost...
16  16- Cooperative organizations to finance crop operations, in conjunction with activities ...
17  17- Supplemental unemployment benefit trusts, providing payments of suppl. unemployment comp...
18  18- Employee funded pension trusts, providing benefits under a pension plan funded by employees...
19  19- Post or organization of war veterans.
20  20- Trusts for prepaid group legal services, as part of a qual. group legal service plan or plans.
21  21- Black lung trusts, satisfying claims for compensation under Black Lung Acts.
22  22- Multiemployer Pension Plan
23  23- Veterans association formed prior to 1880
24  24-Trust described in Section 4049 of ERISA
25  25- Title Holding Company for Pensions, etc
26  26- State-Sponsored High Risk Health Insurance Organizations
27  27- State-Sponsored Workers Compensation Reinsurance
40  40- Apostolic and religious orgs. - 501(d)
50  50- Cooperative Hospital Service Organization - 501(e)
60  60- Cooperative Service Org. of Operating Educ. Org.- 501(f)
70  70- Child Care Organization - 501(k)
71  71- Charitable Risk Pool
80  80- Farmers' Cooperatives
81  81- Qualified State-Sponsored Tuition Program
82  82- 527 Political Organizations
90  90- 4947(a)(2) Split Interest Trust
91  91- 4947(a)(1) Public Charity (Files 990/990-EZ)
92  92- 4947(a)(1) Private Foundations
93  93- 1381(a)(2) Taxable Farmers Cooperative
CO  CO- Unspecified 501(c) Organization Other Than 501(c)(3)
table( d$ORG_TYPE_501C, d$ORG_TYPE_PF, useNA="ifany" ) %>% pander()
  0 1 NA
501C_00 253 0 0
501C_01 800 0 0
501C_02 9681 0 0
501C_03 1452651 145019 905717
501C_04 187435 0 0
501C_05 91330 0 0
501C_06 118543 0 0
501C_07 96791 0 0
501C_08 110634 0 0
501C_09 19565 0 0
501C_10 29748 0 0
501C_11 12 0 0
501C_12 8219 0 0
501C_13 13927 0 0
501C_14 5665 0 0
501C_15 2472 0 0
501C_16 27 0 0
501C_17 628 0 0
501C_18 6 0 0
501C_19 57760 0 0
501C_20 65 0 0
501C_21 30 0 0
501C_23 3 0 0
501C_24 2 0 0
501C_25 2780 0 0
501C_26 11 0 0
501C_27 18 0 0
501C_29 23 0 0
501C_40 250 0 0
501C_50 30 0 0
501C_70 1 0 0
501C_71 1 0 0
501C_80 1383 0 0
501C_81 1 0 0
501C_82 29 0 0
501C_90 790 0 0
501C_91 1362 0 0
501C_92 140 5895 3493
501C_93 2440 0 0
NA 0 0 187369

1.6 Composite Codes

1.6.1 Tax Exempt Purpose

Combine 501C Type with BMF “Classification Codes”.

Classification Code is ambiguous. When applying for tax exempt status from the IRS, nonprofit founders report organizational purpose on their application, which is used by the IRS to make a determination on whether the organization qualifies for tax-exempt status. These codes differ from the NTEE taxonomies in that each code is binary (yes/no), and they are NOT mutually exclusive, so a nonprofit mission can fulfill one or several of these purposes.

IRS Tax-Exempt Purpose Codes: [ See Instructions pp 6-7 ]

  • Charitable Purpose [yes/no]
  • Religious Purpose [yes/no]
  • Educational Purpose [yes/no]
  • Scientific Purpose [yes/no]
  • Literary Purpose [yes/no]
  • Public Safety Purpose [yes/no]
  • Amateur Sports Purpose [yes/no]
  • Prevent Cruelty to Animals and/or Children [yes/no]

The other 501C Types have similar tax-exempt purpose categories:

|CODE |LABEL                                                                                      |
|:----|:------------------------------------------------------------------------------------------|
|1-1  |Government Instrumentality (501C1-1)                                                       |
|2-1  |Title-Holding Corporation (501C2-1)                                                        |
|3-1  |Charitable Organization (501C3-1)                                                          |
|3-2  |Educational Organization (501C3-2)                                                         |
|3-3  |Literary Organization (501C3-3)                                                            |
|3-4  |Organization to Prevent Cruelty to Animals (501C3-4)                                       |
|3-5  |Organization to Prevent Cruelty to Children (501C3-5)                                      |
|3-6  |Organization for Public Safety Testing (501C3-6)                                           |
|3-7  |Religious Organization (501C3-7)                                                           |
|3-8  |Scientific Organization (501C3-8)                                                          |
|4-1  |Civic League (501C4-1)                                                                     |
|4-2  |Local Association of Employees (501C4-2)                                                   |
|4-3  |Social Welfare Organization (501C4-3)                                                      |
|5-1  |Agricultural Organization (501C5-1)                                                        |
|5-2  |Horticultural Organization (501C5-2)                                                       |
|5-3  |Labor Organization (501C5-3)                                                               |
|6-1  |Board of Trade (501C6-1)                                                                   |
|6-2  |Business League (501C6-2)                                                                  |
|6-3  |Chamber of Commerce (501C6-3)                                                              |
|6-4  |Real Estate Board (501C6-4)                                                                |
|7-1  |Pleasure, Recreational, or Social Club (501C7-1)                                           |
|8-1  |Fraternal Beneficiary Society, Order or Association (501C8-1)                              |
|9-1  |Voluntary Employees Beneficiary Association (Non-Govt Emps) (501C9-1)                      |
|9-2  |Voluntary Employees Beneficiary Association (Govt Emps) (501C9-2)                          |
|10-1 |Domestic Fraternal Societies and Associations (501C10-1)                                   |
|11-1 |Teachers Retirement Fund Assoc (501C11-1)                                                  |
|12-1 |Benevolent Life Insurance Assoc (501C12-1)                                                 |
|12-2 |Mutual Ditch or Irrigation Co (501C12-2)                                                   |
|12-3 |Mutual Cooperative Telephone Co (501C12-3)                                                 |
|12-4 |Organization Like Those on Three Preceding Lines (501C12-4)                                |
|13-1 |Burial Association (501C13-1)                                                              |
|13-2 |Cemetery Company (501C13-2)                                                                |
|14-1 |Credit Union (501C14-1)                                                                    |
|14-2 |Other Mutual Corp or Assoc (501C14-2)                                                      |
|15-1 |Mutual Insurance Company or Assoc Other Than Life or Marine (501C15-1)                     |
|16-1 |Corp Financing Crop Operations (501C16-1)                                                  |
|17-1 |Supplemental Unemployment Compensation Trust or Plan (501C17-1)                            |
|18-1 |Employee Funded Pension Trust (501C18-1)                                                   |
|19-1 |Post or Organization of War Veterans (501C19-1)                                            |
|20-1 |Legal Service Organization (501C20-1)                                                      |
|21-1 |Black Lung Trust (501C21-1)                                                                |
|22-1 |Multiemployer Pension Plan (501C22-1)                                                      |
|23-1 |Veterans Assoc Formed Prior to 1880 (501C23-1)                                             |
|24-1 |Trust Described in Sect 4049 of ERISA (501C24-1)                                           |
|25-1 |Title Holding Co for Pensions etc (501C25-1)                                               |
|26-1 |State-Sponsored High Risk Health Insurance Organizations (501C26-1)                        |
|27-1 |State-Sponsored Workers Compensation Reinsurance (501C27-1)                                |
|29-1 |ACA 1322 Qualified Nonprofit Health Insurance Issuers (501C29-1)                           |
|40-1 |Apostolic and Religious Org (501(d)) (501C40-1)                                            |
|50-1 |Cooperative Hospital Service Organization (501(e)) (501C50-1)                              |
|60-1 |Cooperative Service Organization of Operating Educational Organization (501(f)) (501C60-1) |
|70-1 |Child Care Organization (501(k)) (501C70-1)                                                |
|71-1 |Charitable Risk Pool (501C71-1)                                                            |
|81-1 |Qualified State-Sponsored Tuition Program (501C81-1)                                       |
|92-1 |4947(a)(1) - Private Foundation (Form 990PF Filer) (501C92-1)                              |
###
###   BMF_TAX_EXEMPT_PURP
###

###    THIS TAKES A COUPLE OF HOURS TO RUN - 
###    CURRENTLY SET TO A CACHED VERSION

code.table <- taxexp.purp

get_codes <- function( x1, x2 ){
  if( is.na(x2) | is.na(x1) )
  { return(NA) }
  xx <- stringr::str_split_1( as.character(x2), "" )
  X <- paste0( x1, "-", xx ) 
  df <- data.frame( CD=X )
  df <- merge( df, code.table  ) %>% na.omit()
  code <- paste0( df$LABEL, collapse=" ;; " )
  return( code )
}

d.codes <- 
  purrr::map2_chr( 
    d$BMF_SUBSECTION_CODE, 
    d$BMF_CLASSIFICATION_CODE,
    get_codes )

d$BMF_TAX_EXEMPT_PURP <- d.codes

dsub <- dplyr::select( d, EIN2, BMF_TAX_EXEMPT_PURP )
dsub <- na.omit( dsub )
write.csv( dsub, "data/BMF_TAX_EXEMPT_PURP.csv", row.names=F )
saveRDS( dsub, "data/BMF_TAX_EXEMPT_PURP.rds" ) # one-tenth size
purpose.cats <- readRDS( "data/BMF_TAX_EXEMPT_PURP.rds" )
d <- merge( d, purpose.cats )

New labeled version examples:

|                                                    BMF_TAX_EXEMPT_PURP|      Freq|
|----------------------------------------------------------------------:|---------:|
|                                                                     NA| 1,570,547|
|                                      Charitable Organization (501C3-1)|   722,351|
|                                       Religious Organization (501C3-7)|   265,283|
|Charitable Organization (501C3-1) ;; Educational Organization (501C3-2)|   229,645|
|                                     Educational Organization (501C3-2)|   214,094|
|                       Pleasure, Recreational, or Social Club (501C7-1)|    48,616|
|  Charitable Organization (501C3-1) ;; Religious Organization (501C3-7)|    47,822|
|                                  Social Welfare Organization (501C4-3)|    42,408|
|          Fraternal Beneficiary Society, Order or Association (501C8-1)|    38,498|
|                                              Business League (501C6-2)|    37,744|
|                                           Labor Organization (501C5-3)|    34,396|
|                        Post or Organization of War Veterans (501C19-1)|    26,790|
|                                                 Civic League (501C4-1)|    24,938|

1.6.2 Filing Requirement Codes

Combine BMF_FILING_REQ_CODE and BMF_PF_FILING_REQ_CODE.

Currently private foundations appear in the “not required to file” category (00) in BMF_FILING_REQ_CODE. This is confusing because the field contains every type of 990 form except the 990-PF, so combining BMF_FILING_REQ_CODE and BMF_PF_FILING_REQ_CODE avoids confusion.

# |CODE |FORM              |FRC                        |
# |:----|:-----------------|:--------------------------|
# |01   |990/990-EZ        |990/990-EZ (01)            |
# |02   |990-N             |990-N (02)                 |
# |PF   |990-PF            |990-PF (00+PF={1,2,3})     |  # new group
# |03   |GROUP RETURN      |GROUP RETURN (03)          |
# |04   |990-BL            |990-BL (04)                |
# |06   |NOT RQ: CHURCH    |NOT RQ: CHURCH (06)        |
# |07   |NOT RQ: 501C1 GOV |NOT RQ: 501C1 GOV (07)     |
# |13   |NOT RQ: RELIG ORG |NOT RQ: RELIG ORG (13)     |
# |14   |NOT RQ: STATE DIV |NOT RQ: STATE DIV (14)     |
# |00   |NOT RQ: ALL OTHER |NOT RQ: ALL OTHER (00)     |
# |11   |UNKNOWN           |Undocumented Category (11) |
frc <- 
structure(list(CODE = c("01", "02", "PF", "03", "04", "06", "07", "13", 
"14", "00","11"), FORM = c("990/990-EZ", "990-N", "990-PF", "GROUP RETURN", "990-BL", 
"NOT RQ: CHURCH", "NOT RQ: 501C1 GOV", "NOT RQ: RELIG ORG", "NOT RQ: STATE DIV", 
"NOT RQ: ALL OTHER","UNKNOWN"), FRC = c("990/990EZ (01)", "990-N (02)", "990PF (00+PF={1,2,3})", "GROUP RETURN (03)", 
"990-BL (04)", "NRQ: CHURCH (06)", "NRQ: 501C1 GOV (07)", "NOT RQ: RELIG ORG (13)", 
"NOT RQ: STATE DIV (14)", "NOT RQ: ALL OTHER (00)","Undocumented Category (11)")), row.names = c(NA, 
-11L), class = "data.frame")


#  unique(x) %>% as.character() %>% dput()
#  c(NA, "10", "20", "1", "0", "2", "6", "60", "3", "11", "30", 
#   "13", "130", "140", "70", "21", "7", "40", "14", "4", "888")
# |BMF_FILING_REQ_CODE |      Freq|
# |:-------------------|---------:|
# |0                   |   221,065|
# |1                   |   570,568|
# |2                   | 1,126,372|
# |3                   |     6,856|
# |4                   |        14|
# |6                   |   279,358|
# |7                   |       747|
# |10                  |   118,332|  # recode as 1
# |11                  |       671|
# |13                  |     8,701|
# |14                  |     2,602|
# |20                  |   475,818| # recode as 2
# |21                  |        64|
# |30                  |       179| # recode as 3
# |40                  |        17| # recode as 4
# |60                  |    12,620| # recode as 6
# |70                  |        27| # recode as 7
# |130                 |     1,213| # recode as 13
# |140                 |        67| # recode as 14
# |888                 |         1|
# |NA                  |   637,707| 
# PF FILING REQUIREMENT CODE

# |BMF_PF_FILING_REQ_CODE |      Freq|
# |:----------------------|---------:|
# |0                      | 1,793,444|
# |NA                     | 1,518,639|
# |1                      |   150,658|  # private operating foundation 
# |3                      |       237|  # private non-operating foundation
# |2                      |        21|  # private non-operating foundation
x <- d$BMF_FILING_REQ_CODE

# DROP ZEROS AT THE END 
x <- gsub( "([0-9]{2})(0$)", "\\1", x )
x <- gsub( "([0-9]{1})(0$)", "\\1", x )
x[ x == 888 ] <- NA

# ADD LEADING ZEROS
x <- str_pad( x, width=2, side="left", pad="0" ) 

# FIX PF: IS LISTED AS "NOT REQUIRED (00)" OTHERWISE 
x[ d$BMF_PF_FILING_REQ_CODE %in% c(1,2,3) ] <- "PF"


######
######  BMF_FILING_REQ_CODE
######

# CONVERT TO FACTORS
d$BMF_FILING_REQ_CODE <- 
  factor( x, 
          levels=frc$CODE, 
          labels=frc$FRC )

fable( d$BMF_FILING_REQ_CODE )
BMF_FILING_REQ_CODE Freq
990/990EZ (01) 696,890
990-N (02) 1,628,405
990PF (00+PF={1,2,3}) 151,568
GROUP RETURN (03) 7,399
990-BL (04) 31
NRQ: CHURCH (06) 293,132
NRQ: 501C1 GOV (07) 782
NOT RQ: RELIG ORG (13) 10,162
NOT RQ: STATE DIV (14) 2,673
NOT RQ: ALL OTHER (00) 71,408
Undocumented Category (11) 671
NA 653,446
######
######  BMF_PF_FILING_REQ_CODE
######

pf.labels <- 
  c( "NON-PF (0)","OPERATING PF (1)",
     "NON-OPERATING PF (2)","NON-OPERATING PF (3)" )

pf.levels <- c(0,1,2,3)

frc.pf <- data.frame( code=pf.levels, label=pf.labels )

d$BMF_PF_FILING_REQ_CODE <- 
  factor( d$BMF_PF_FILING_REQ_CODE, 
          levels=c(0,1,2,3),
          labels=pf.labels )

fable( d$BMF_PF_FILING_REQ_CODE )
BMF_PF_FILING_REQ_CODE Freq
NON-PF (0) 2,232,678
OPERATING PF (1) 151,310
NON-OPERATING PF (2) 21
NON-OPERATING PF (3) 237
NA 1,132,321

1.7 Select Variables

final.order <- c( 
  
  "EIN", "EIN2", 
  "ORG_NAME", 
  "ORG_NAME_SEC", 
  "ORG_PERS_ICO", 
  "ORG_FISCAL_PERIOD", 
  "ORG_TYPE_501C", 
  "ORG_TYPE_PF",
  "ORG_CORP_FORM",
  "ORG_RULING_DATE", 
  "ORG_RULING_YEAR",
  "ORG_YEAR_FIRST", 
  "ORG_YEAR_LAST", 
  "ORG_YEAR_COUNT", 

  "NTEE_IRS", 
  "NTEE_NCCS",  
  "NTEEV2", 
  "NCCS_LEVEL_1", 
  "NCCS_LEVEL_2", 
  "NCCS_LEVEL_3",

  "BMF_SUBSECTION_CODE",    
  "BMF_TAX_EXEMPT_PURP",
  "BMF_CLASSIFICATION_CODE",
  "BMF_DEDUCTIBILITY_CODE", 
  "BMF_FILING_REQ_CODE", 
  "BMF_PF_FILING_REQ_CODE",
  "BMF_FOUNDATION_CODE",    
  "BMF_STATUS_CODE", 
  "BMF_AFFILIATION_CODE",   
  "BMF_GROUP_EXEMPT_NUM",
  "BMF_INCOME_LEVEL",        
  "BMF_ASSET_LEVEL", 

  "F990_FORM_YEAR",
  "F990_TOTAL_REVENUE_RECENT", 
  "F990_TOTAL_INCOME_RECENT", 
  "F990_TOTAL_ASSETS_RECENT",   
 
  "F990_ORG_ADDR_STREET",
  "F990_ORG_ADDR_CITY", 
  "F990_ORG_ADDR_STATE", 
  "F990_ORG_ADDR_ZIP", 
 
  "CENSUS_URBAN_AREA",
  "CENSUS_STATE_ABBR",
  "CENSUS_COUNTY_NAME",
  "CENSUS_CBSA_NAME", 
  "CENSUS_CBSA_FIPS",
  "CENSUS_BLOCK_FIPS",
   
  "ORG_ADDR_FULL", 
  "ORG_ADDR_MATCH", 
  "LATITUDE", 
  "LONGITUDE", 
  "GEOCODER_SCORE", 
  "GEOCODER_MATCH"  )



# MAKE SURE ALL LISTED VARIABLES EXIST 
setdiff( final.order, names(d) )

character(0)

# DROPPED VARIABLES 
setdiff( names(d), final.order )

character(0)

d <- select( d, all_of( final.order ) )

1.8 Save Objects

saveRDS( d, "data/BMF-LABELED-TEMP.rds" )
write_csv( d, "data/BMF-LABELED-TEMP.csv", na="" )
dictionaries <- 
 list(
   ORG_CORP_FORM = corp,
   ORG_FISCAL_PERIOD = fiscal.codes,
   BMF_AFFILIATION_CODE = aff.codes,
   BMF_ASSET_LEVEL = asset.level,
   BMF_DEDUCTIBILITY_CODE = deduct,
   BMF_FILING_REQ_CODE = frc,
   BMF_FOUNDATION_CODE = fnd,
   BMF_INCOME_LEVEL = asset.level,
   BMF_PF_FILING_REQ_CODE = frc.pf,
   BMF_STATUS_CODE = status,
   BMF_TAX_EXEMPT_PURP = taxexp.purp,
   NCCS_LEVEL_1 = level1.codes,
   NCCS_LEVEL_2 = level2.codes,
   NCCS_LEVEL_3 = level3.codes    )

dump( "dictionaries", file="data/DICTIONARIES.R" )