************************************* * IV ESTIMATES ************************************* log using iv2, replace set trace off set more off capture drop _all set mem 120m set matsize 500 ******************************************* * LARGE PLANTS ARE DEFINED AS HAVING * AT LEAST 1000 WORKERS ******************************************** clear u data2 *keep if te >0 g npw = te-pw g large = 1 if te >=1000 replace large =0 if te < 1000 g skill = 1- (pw/te) summ skill, detail correlate skill te g skilled = 1 if skill >.30 replace skilled =0 if skill <=.30 *replace large = large*skilled * SUMMARY STATS ON LARGE PLANTS *g small = 1-large *summ large *summ te large if large ==1 *sort large *collapse (sum) sumte = te, by(large) *summ *summ if large ==1 *summ if large ==0 g size = 1 if te >=0 & te <=10 replace size = 2 if te >10 & te <=100 replace size = 3 if te >100 & te <=500 replace size = 4 if te >500 & te <=1000 replace size = 5 if te >1000 sort size by size: summ te * NOW, COLLAPSES BY SMSA sort smsa ind save tmp2, replace collapse iv1 = large (count) obs1 = large, by(smsa) summ sort smsa save tmp, replace * NOW COLLAPSE BY SMSA AND 3-DIGIT INDUSTRY clear u tmp2 collapse iv3 = large (count) obs3 = large, by(smsa ind) fillin smsa ind summ sort smsa ind * NOW MERGES AND CALCULATE IV THAT EXCLUDES * 3-DIGIT INDUSTRY merge smsa using tmp drop _merge g w3 = obs3/obs1 g iv = (iv1 - iv3*w3)/(1-w3) summ replace iv = iv1 if w3==. summ correlate iv1 iv keep smsa ind iv iv1 iv3 obs1 obs3 sort smsa ind save tmp3, replace sort smsa merge smsa using smsa8090 drop _merge summ sort smsa ind save tmp3, replace ******************************************* * MAIN DATA ******************************************* use D:/drive-e/moretti/July03-revision/tmp0 sort smsa ind merge smsa ind using tmp3 drop if logVA ==. | Dx ==. | iv ==. g smsa_ind = smsa80*10000 + ind ******************************************* * FIRST CHECK THE VALIDITY OF IV ******************************************* * LIST CITIES, BY SHARE OF NEW LARGE PLANTS *sort smsa80 *by smsa80: g pp = 1 if _n==1 *keep if pp ==1 *sort iv1 *list smsa80 city iv1 * ADD CITY VARIABLES drop _merge sort smsa80 merge smsa80 using covar g Dobs = pop90-pop80 * Is IV correlated with region or with pop in 1980? *xi: reg iv i.division *xi: reg iv pop80 * Is IV correlated with total number of new openings in RELEVANT industry? *g n_open = obs3 *summ n_open *areg iv n_open DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) *replace n_open = n_open/pop80 *areg iv n_open DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) * Is IV correlated with 1980 wages in RELEVANT industry? *drop _merge *sort ppn *merge ppn using wages80 *g wage = sw80/te80 *summ *areg iv wage DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) * Is IV correlated with 1980 TFP in RELEVANT industry? *drop _merge *sort ppn *merge ppn using tfp80 *g tfp = logA80 *areg iv tfp DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) *areg iv tfp Dobs dhisp dblack dcitiz dsex dune DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) ******************************************* * NOW RUN IV ESTIMATES ******************************************* correlate Dx iv summ Dx iv, detail xi: reg Dx iv DlogL1 DlogL4 DlogK mu, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL4 DlogK mu i.fst, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu i.fst, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL4 DlogK mu i.ind3, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu i.ind3, cluster(smsa_ind) areg Dx iv DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd areg DlogVA fit DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK Dobs dhisp dblack dcitiz dsex dune mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK Dobs dhisp dblack dcitiz dsex dune mu , cluster(smsa_ind) absorb(ind3_st) drop fit g logL1sq = logL1*logL1 g logL4sq = logL4*logL4 g logKsq = logK*logK g logL1_K = logL1*logK g logL4_K = logL4*logK g logL1_L4 = logL1*logL4 g logL1sq8 = logL180*logL180 g logL4sq8 = logL480*logL480 g logKsq8 = logK80*logK80 g logL1_K8 = logL180*logK80 g logL4_K8 = logL480*logK80 g logL1_L8 = logL180*logL480 g DlogL1sq = DlogL1*DlogL1 g DlogL4sq = DlogL4*DlogL4 g DlogKsq = DlogK*DlogK g DlogL1_K = DlogL1*DlogK g DlogL4_K = DlogL4*DlogK g DlogL1_L = DlogL1*DlogL4 xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.fst, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.fst, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.ind3, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.ind3, cluster(smsa_ind) areg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd areg DlogVA fit DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv Dobs dhisp dblack dcitiz dsex dune i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit Dobs dhisp dblack dcitiz dsex dune i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) drop fit ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ************************************************************************ ******************************************* ******************************************* * LARGE PLANTS ARE DEFINED AS HAVING * AT LEAST 500 WORKERS ******************************************** clear u data2 *keep if te >0 g npw = te-pw g large = 1 if te >=500 replace large =0 if te < 500 g skill = 1- (pw/te) summ skill, detail correlate skill te g skilled = 1 if skill >.25 replace skilled =0 if skill <=.25 g size = 1 if te >=0 & te <=10 replace size = 2 if te >10 & te <=100 replace size = 3 if te >100 & te <=500 replace size = 4 if te >500 & te <=1000 replace size = 5 if te >1000 sort size by size: summ skill *replace large = large*skilled summ ind * NOW, COLLAPSES BY SMSA sort smsa ind save tmp2, replace collapse iv1 = large (count) obs1 = large, by(smsa) summ sort smsa save tmp, replace * NOW COLLAPSE BY SMSA AND 3-DIGIT INDUSTRY clear u tmp2 collapse iv3 = large (count) obs3 = large, by(smsa ind) fillin smsa ind summ sort smsa ind * NOW MERGES AND CALCULATE IV THAT EXCLUDES * 3-DIGIT INDUSTRY merge smsa using tmp drop _merge g w3 = obs3/obs1 g iv = (iv1 - iv3*w3)/(1-w3) summ replace iv = iv1 if w3==. summ correlate iv1 iv keep smsa ind iv iv1 iv3 sort smsa ind save tmp3, replace ******************************************* * MAIN DATA ******************************************* use D:/drive-e/moretti/July03-revision/tmp0 sort smsa ind merge smsa ind using tmp3 drop if logVA ==. g smsa_ind = smsa80*10000 + ind summ Dx iv correlate Dx iv xi: reg Dx iv DlogL1 DlogL4 DlogK mu, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL4 DlogK mu i.fst, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu i.fst, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL4 DlogK mu i.ind3, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL4 DlogK mu i.ind3, cluster(smsa_ind) areg Dx iv DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd areg DlogVA fit DlogL1 DlogL4 DlogK mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit i.ind2|DlogL1 i.ind2|DlogL4 i.ind2|DlogK mu , cluster(smsa_ind) absorb(ind3_st) drop fit g logL1sq = logL1*logL1 g logL4sq = logL4*logL4 g logKsq = logK*logK g logL1_K = logL1*logK g logL4_K = logL4*logK g logL1_L4 = logL1*logL4 g logL1sq8 = logL180*logL180 g logL4sq8 = logL480*logL480 g logKsq8 = logK80*logK80 g logL1_K8 = logL180*logK80 g logL4_K8 = logL480*logK80 g logL1_L8 = logL180*logL480 g DlogL1sq = DlogL1*DlogL1 g DlogL4sq = DlogL4*DlogL4 g DlogKsq = DlogK*DlogK g DlogL1_K = DlogL1*DlogK g DlogL4_K = DlogL4*DlogK g DlogL1_L = DlogL1*DlogL4 xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.fst, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.fst, cluster(smsa_ind) xi: reg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.ind3, cluster(smsa_ind) xi: ivreg DlogVA (Dx = iv) DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu i.ind3, cluster(smsa_ind) areg Dx iv DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd areg DlogVA fit DlogL1 DlogL1sq DlogL4 DlogL4sq DlogK DlogKsq DlogL1_K DlogL4_K DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) drop fit xi:areg Dx iv i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) predict fit , xbd xi: areg DlogVA fit i.ind2|DlogL1 i.ind2|DlogL1sq i.ind2|DlogL4 i.ind2|DlogL4sq i.ind2|DlogK i.ind2|DlogKsq i.ind2|DlogL1_K i.ind2|DlogL4_K i.ind2|DlogL1_L4 mu , cluster(smsa_ind) absorb(ind3_st) drop fit