* This file creates the final dataset that is used * in the main empirical analysis * * In particular, this file: * - extract a subset of variables for * 1987. * - Assign smsa80, ind, fst, * and education using ppn. * - Delete any ppn * that is missing either in 87 or 92. * - Merge with errors2. * * Note: to run this file, you need the following 3 sas data: * medu_lr3, medu_lr4, medu_lr5 * These files contain data on workers education from * the 1980 and 1990 Censuses of Population ***************************************************************; options compress=no; libname in 'D:/cm/'; libname on 'D:/enrico/imported/data'; data y92; set in.extract; if yr ^=92 then delete; if miss92 =1 or miss82=1 then delete; if err1=1 or err2=1 then delete; if rural92 =1 or rural82=1 then delete; keep ppn smsa80 fst medu medu80 medu2 medu280 mcoll mcoll80 mcoll2 mcoll280; proc sort; by ppn; *proc means; * data for 1987 augmented with smsa; data y87; merge in.Gs87 y92; by ppn; yr=87; if te =. or medu =. then delete; missin=0; * CPI; if yr=87 then va = va*1.234; *output; if yr=87 then sw = sw*1.234; *labor; if yr=87 then lc = lc*1.234; if yr=87 then ww = ww*1.234; if yr=87 then ow = ow*1.234; if yr=87 then vlc = vlc*1.234; if yr=87 then le = le*1.234; if yr=87 then bae = bae*1.234; *capital; if yr=87 then mae = mae*1.234; * MULTI-ESTABLISHMENTS; if substr(id,1,1)='0' then MU =0; else MU=1; label MU = 'multi-establishment dummy'; * FIRM ID (see LRD manual); if substr(id,1,1)='0' then FID=ID; else FID=substr(ID,1,6)||'0000'; label FID ='FID=ID if MU=0; FID=1234560000 if MU=1'; keep ppn MU FID smsa80 fst yr ind medu medu80 medu2 medu280 mcoll mcoll80 mcoll2 mcoll280 te pw ph va sw ww ow lc le vlc bae mae missin; *proc means; * list of ppn that are non missing in 1982, 87, 92; data list_ppn; set y87; keep ppn missin; * data for 1982 and 1992: only ppn that are non missing; data y82y92; merge in.extract list_ppn ; by ppn; if missin ^= 0 then delete; *proc means; * data for all years; data st828792; set y82y92 y87; *proc means; run; **************************************************** * Merging with other measures of average education; data tmp; set on.medu_lr3; proc sort; by smsa80; data tmp1; merge tmp on.medu_lr4 ; by smsa80; medu6 = (medu5 - (obs4/obs5)*medu4)/((obs5-obs4)/obs5); if medu4 =. then medu6=medu5; medu680 = (medu580 - (obs480/obs580)*medu480)/((obs580-obs480)/obs580); if medu480 =. then medu680=medu580; *Asymptotic approximation of variance; var6 = var5/(obs5-obs4); if obs4 =. then var6=var5; var680 = var580/(obs580-obs480); if obs480 =. then var680=var580; var4 = var4/obs4; var480 = var480/obs480; var5 = var5/obs5; var580 = var580/obs580; data tmp2; set st828792; * match Census industry code and SIC code; ind2 = int(ind/100); ind3 = int(ind/10); ind4=ind; if ind3 = 201 then indCen = 100 ; if ind3 = 202 then indCen = 101 ; if ind3 = 203 then indCen = 102 ; if ind3 = 204 then indCen = 110 ; if ind3 = 205 then indCen = 111 ; if ind3 = 206 then indCen = 112 ; if ind3 = 208 then indCen = 120 ; if ind3 = 207 then indCen = 121 ; if ind3 = 209 then indCen = 121 ; if ind3 = 211 then indCen = 130 ; if ind3 = 225 then indCen = 132 ; if ind3 = 226 then indCen = 140 ; if ind3 = 227 then indCen = 141 ; if ind3 = 221 then indCen = 142 ; if ind3 = 222 then indCen = 142 ; if ind3 = 223 then indCen = 142 ; if ind3 = 224 then indCen = 142 ; if ind3 = 228 then indCen = 142 ; if ind3 = 229 then indCen = 150 ; if ind3 = 231 then indCen = 151 ; if ind3 = 232 then indCen = 151 ; if ind3 = 233 then indCen = 151 ; if ind3 = 234 then indCen = 151 ; if ind3 = 235 then indCen = 151 ; if ind3 = 236 then indCen = 151 ; if ind3 = 237 then indCen = 151 ; if ind3 = 238 then indCen = 151 ; if ind3 = 239 then indCen = 152 ; if ind3 = 261 then indCen = 160 ; if ind3 = 262 then indCen = 160 ; if ind3 = 263 then indCen = 160 ; if ind3 = 266 then indCen = 160 ; if ind3 = 264 then indCen = 161 ; if ind3 = 265 then indCen = 162 ; if ind3 = 271 then indCen = 171 ; if ind3 = 272 then indCen = 172 ; if ind3 = 273 then indCen = 172 ; if ind3 = 272 then indCen = 172 ; if ind3 = 275 then indCen = 172 ; if ind3 = 276 then indCen = 172 ; if ind3 = 277 then indCen = 172 ; if ind3 = 278 then indCen = 172 ; if ind3 = 279 then indCen = 172 ; if ind3 = 282 then indCen = 180 ; if ind3 = 283 then indCen = 181 ; if ind3 = 284 then indCen = 182 ; if ind3 = 285 then indCen = 190 ; if ind3 = 287 then indCen = 191 ; if ind3 = 281 then indCen = 192 ; if ind3 = 282 then indCen = 192 ; if ind3 = 283 then indCen = 192 ; if ind3 = 284 then indCen = 192 ; if ind3 = 285 then indCen = 192 ; if ind3 = 286 then indCen = 192 ; if ind3 = 289 then indCen = 192 ; if ind3 = 291 then indCen = 200 ; if ind3 = 295 then indCen = 201 ; if ind3 = 296 then indCen = 201 ; if ind3 = 297 then indCen = 201 ; if ind3 = 298 then indCen = 201 ; if ind3 = 299 then indCen = 201 ; if ind3 = 301 then indCen = 210 ; if ind3 = 302 then indCen = 211 ; if ind3 = 303 then indCen = 211 ; if ind3 = 304 then indCen = 211 ; if ind3 = 305 then indCen = 211 ; if ind3 = 306 then indCen = 211 ; if ind3 = 308 then indCen = 212 ; if ind3 = 311 then indCen = 220 ; if ind3 = 313 then indCen = 221 ; if ind3 = 314 then indCen = 221 ; if ind3 = 315 then indCen = 222 ; if ind3 = 316 then indCen = 222 ; if ind3 = 317 then indCen = 222 ; if ind3 = 319 then indCen = 222 ; if ind3 = 241 then indCen = 230 ; if ind3 = 242 then indCen = 231 ; if ind3 = 243 then indCen = 231 ; if ind3 = 245 then indCen = 232 ; if ind3 = 244 then indCen = 241 ; if ind3 = 245 then indCen = 241 ; if ind3 = 246 then indCen = 241 ; if ind3 = 247 then indCen = 241 ; if ind3 = 248 then indCen = 241 ; if ind3 = 249 then indCen = 241 ; if ind3 = 252 then indCen = 242 ; if ind3 = 321 then indCen = 250 ; if ind3 = 322 then indCen = 250 ; if ind3 = 323 then indCen = 250 ; if ind3 = 324 then indCen = 251 ; if ind3 = 325 then indCen = 251 ; if ind3 = 326 then indCen = 251 ; if ind3 = 327 then indCen = 251 ; if ind3 = 325 then indCen = 252 ; if ind3 = 326 then indCen = 261 ; if ind3 = 328 then indCen = 262 ; if ind3 = 329 then indCen = 262 ; if ind3 = 331 then indCen = 270 ; if ind3 = 332 then indCen = 271 ; if ind4 = 3334 then indCen = 272 ; if ind4 = 3353 then indCen = 272 ; if ind4 = 3354 then indCen = 272 ; if ind4 = 3355 then indCen = 272 ; if ind4 = 3361 then indCen = 272 ; if ind4 = 3331 then indCen = 280 ; if ind4 = 3332 then indCen = 280 ; if ind4 = 3333 then indCen = 280 ; if ind4 = 3339 then indCen = 280 ; if ind3 = 334 then indCen = 280 ; if ind4 = 3351 then indCen = 280 ; if ind4 = 3356 then indCen = 280 ; if ind4 = 3357 then indCen = 280 ; if ind4 = 3364 then indCen = 280 ; if ind4 = 3366 then indCen = 280 ; if ind4 = 3369 then indCen = 280 ; if ind3 = 339 then indCen = 280 ; if ind3 = 342 then indCen = 281 ; if ind3 = 344 then indCen = 282 ; if ind3 = 345 then indCen = 290 ; if ind3 = 346 then indCen = 291 ; if ind3 = 348 then indCen = 292 ; if ind3 = 341 then indCen = 300 ; if ind3 = 343 then indCen = 300 ; if ind3 = 347 then indCen = 300 ; if ind3 = 349 then indCen = 300 ; if ind3 = 351 then indCen = 310 ; if ind3 = 352 then indCen = 311 ; if ind3 = 353 then indCen = 312 ; if ind3 = 354 then indCen = 320 ; if ind3 = 357 then indCen = 321 ; if ind4 = 3573 then indCen = 322 ; if ind3 = 355 then indCen = 331 ; if ind3 = 356 then indCen = 331 ; if ind3 = 358 then indCen = 331 ; if ind3 = 359 then indCen = 331 ; if ind3 = 363 then indCen = 340 ; if ind3 = 365 then indCen = 341 ; if ind3 = 366 then indCen = 341 ; if ind3 = 361 then indCen = 342 ; if ind3 = 362 then indCen = 342 ; if ind3 = 364 then indCen = 342 ; if ind3 = 367 then indCen = 342 ; if ind3 = 369 then indCen = 342 ; if ind3 = 371 then indCen = 351 ; if ind3 = 372 then indCen = 352 ; if ind3 = 373 then indCen = 360 ; if ind3 = 374 then indCen = 361 ; if ind3 = 376 then indCen = 362 ; if ind3 = 375 then indCen = 370 ; if ind3 = 379 then indCen = 370 ; if ind3 = 381 then indCen = 371 ; if ind3 = 382 then indCen = 371 ; if ind3 = 383 then indCen = 372 ; if ind3 = 384 then indCen = 372 ; if ind3 = 385 then indCen = 372 ; if ind3 = 386 then indCen = 380 ; if ind3 = 387 then indCen = 381 ; if ind2 = 39 then indCen = 391 ; if ind3 = 394 then indCen = 390 ; proc sort; by smsa80 indCen; *********************************************** * Merging all togheter; data st828792; merge tmp2 tmp1 on.medu_lr5 on.medu_lr6; by smsa80 indCen; if ppn =. then delete; proc sort; by ppn; data in.st828792; merge st828792 in.errors2; by ppn; proc means; where (yr =92 and zeros82=0 and zeros282=0 and zeros87=0 and zeros287=0 and zeros92=0 and zeros292=0); proc means; where (yr =87 and zeros82=0 and zeros282=0 and zeros87=0 and zeros287=0 and zeros92=0 and zeros292=0); proc means; where (yr =82 and zeros82=0 and zeros282=0 and zeros87=0 and zeros287=0 and zeros92=0 and zeros292=0);