Source code for amjobs07.R

# Read public.dta for 2000-2010 to create data.frame dd10
#dd10 <- read.dta("public.dta")

# Substitute data from morg files
dd10 <- read.table("morg.txt")
dd10[is.na(dd10)] <- 0
dd10$statefip <- dd10$state

# Select immigration group
dd10$emp_imm <- dd10$emp_edus_stem_grad
forlab1 <- "Employed Foreign STEM Workers with Adv US Degrees"
forlab2 <- "Foreign Share of Total Employment (STEM w/ Adv US Deg)"

#dd10$emp_imm <- dd10$emp_edus_stem_grad+dd10$emp_nedus_stem_grad
#forlab1 <- "Employed Foreign STEM Workers with Advanced Degrees"
#forlab2 <- "Foreign Share of Total Employment (STEM w/ Adv Deg)"

#dd10$emp_imm <- dd10$emp_edus_grad+dd10$emp_nedus_grad
#forlab1 <- "Employed Foreign Workers with Advanced Degrees"
#forlab2 <- "Foreign Share of Total Employment (all w/ Adv Deg)"

#print(summary(dd10))
print("dim(dd10) and dim(dd)")
print(dim(dd10))

# Include just 2000-2007 (or other years) to create dd
dd = dd10[dd10$year < 2008,]
labyears="2000-2007"
#dd = dd10
#labyears="2000-2010"
print(dim(dd))
source("amjobsg.R")

# Calculate additional native jobs created
print("ADDITIONAL JOBS AMONG US NATIVE CREATED BY 100 FOREIGN-BORN WORKERS IN...")
print(sprintf("%9.4f  STEM fields with advanced degrees from US universities",
  sum(dd$emp_native)/sum(dd$emp_edus_stem_grad)*0.004*100))
print(sprintf("%9.4f  STEM fields with advanced degrees from any universities",
  sum(dd$emp_native)/(sum(dd$emp_edus_stem_grad)+sum(dd$emp_nedus_stem_grad))*0.003*100))
print(sprintf("%9.4f  any field with advanced degrees from any universities",
  sum(dd$emp_native)/(sum(dd$emp_edus_grad)+sum(dd$emp_nedus_grad))*0.008*100))

# Exclude points with zero US-educated stem grads to create ddnz
ddnz = dd[dd$emp_imm > 0,]
print("dim(ddnz) and dim(ddz1)")
print(dim(ddnz))

# With dd, change 0 to 1 to create ddz1
ddz1 = dd
ddz1$emp_imm[ddz1$emp_imm == 0] <- 1
print(dim(ddz1))

# Include just California to create ddca
ddca = dd[dd$statefip == 6,]
print("dim(ddca)")
print(dim(ddca))

# Create emprate_native and immshare_emp_stem_e_grad plus their logs
dd$emprate_native   <- dd$emp_native / dd$pop_native * 100
dd$lnemprate_native <- log(dd$emprate_native)
dd$immshare_emp_stem_e_grad   <- dd$emp_imm / dd$emp_total * 100
dd$lnimmshare_emp_stem_e_grad <- log(dd$immshare_emp_stem_e_grad)

# Calculate weight_native
dd$sum_pop_native <- with(dd, ave(pop_native, year, FUN=sum))
dd$weight_native <- dd$pop_native / dd$sum_pop_native

# Create emprate_native and immshare_emp_stem_e_grad plus their logs for ddnz
ddnz$emprate_native   <- ddnz$emp_native / ddnz$pop_native * 100
ddnz$lnemprate_native <- log(ddnz$emprate_native)
ddnz$immshare_emp_stem_e_grad   <- ddnz$emp_imm / ddnz$emp_total * 100
ddnz$lnimmshare_emp_stem_e_grad <- log(ddnz$immshare_emp_stem_e_grad)
# Calculate weight_native for ddnz
ddnz$sum_pop_native <- with(ddnz, ave(pop_native, year, FUN=sum))
ddnz$weight_native <- ddnz$pop_native / ddnz$sum_pop_native

# Create emprate_native and immshare_emp_stem_e_grad plus their logs for ddz1
ddz1$emprate_native   <- ddz1$emp_native / ddz1$pop_native * 100
ddz1$lnemprate_native <- log(ddz1$emprate_native)
ddz1$immshare_emp_stem_e_grad   <- ddz1$emp_imm / ddz1$emp_total * 100
ddz1$lnimmshare_emp_stem_e_grad <- log(ddz1$immshare_emp_stem_e_grad)
# Calculate weight_native for z1
ddz1$sum_pop_native <- with(ddz1, ave(pop_native, year, FUN=sum))
ddz1$weight_native <- ddz1$pop_native / ddz1$sum_pop_native

# Create emprate_native and immshare_emp_stem_e_grad plus their logs for CA
ddca$emprate_native   <- ddca$emp_native / ddca$pop_native * 100
ddca$lnemprate_native <- log(ddca$emprate_native)
ddca$immshare_emp_stem_e_grad   <- ddca$emp_imm / ddca$emp_total * 100
ddca$lnimmshare_emp_stem_e_grad <- log(ddca$immshare_emp_stem_e_grad)

print("Minimum non-zero count of foreign-born workers in STEM fields with advanced degrees from US universities")
print(min(ddnz$emp_imm))
print(" CORREL")
print("   COEF  INTERCEPT    SLOPE  Y VARIABLE ~ X VARIABLE [, WEIGHTS]")
print("-------  ---------  -------  -----------------------------------")

print(paste(labyears, "ALL DATA", sep=", "))
lma <- (with(dd, lm(emprate_native ~ immshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  emprate_native ~ immshare_emp_stem_e_grad",
  with(dd, cor(emprate_native, immshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))

print(paste(labyears, "EXCLUDING POINTS WITH ZERO FOREIGN WORKERS IN STEM WITH ADVANCED US DEGREES", sep=", "))
lma <- (with(ddnz, lm(emprate_native ~ immshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  emprate_native ~ immshare_emp_stem_e_grad",
  with(ddnz, cor(emprate_native, immshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lma <- (with(ddnz, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ lnimmshare_emp_stem_e_grad",
  with(ddnz, cor(lnemprate_native, lnimmshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lma <- (with(ddnz, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native",
  with(ddnz, cor(lnemprate_native, lnimmshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))

print(paste(labyears, "FOR VALUES OF ZERO FOREIGN WORKERS IN STEM, CHANGE ZERO TO ONE", sep=", "))
lma <- (with(ddz1, lm(emprate_native ~ immshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  emprate_native ~ immshare_emp_stem_e_grad",
  with(ddz1, cor(emprate_native, immshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad, data=ddz1)
lma <- (with(ddz1, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ lnimmshare_emp_stem_e_grad",
  with(ddz1, cor(lnemprate_native, lnimmshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lma <- (with(ddz1, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native",
  with(ddz1, cor(lnemprate_native, lnimmshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))

print(paste(labyears, "CALIFORNIA", sep=", "))
lma <- (with(ddca, lm(emprate_native ~ immshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  emprate_native ~ immshare_emp_stem_e_grad",
  with(ddca, cor(emprate_native, immshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lma <- (with(ddca, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ lnimmshare_emp_stem_e_grad",
  with(ddca, cor(lnemprate_native, lnimmshare_emp_stem_e_grad)), lma$coef[1], lma$coef[2]))
lma <- (with(ddca, lm(emprate_native ~ year)))
print(sprintf("%7.4f %9.4f %9.4f  emprate_native ~ year",
  with(ddca, cor(emprate_native, year)), lma$coef[1], lma$coef[2]))
lma <- (with(ddca, lm(lnemprate_native ~ year)))
print(sprintf("%7.4f %9.4f %9.4f  lnemprate_native ~ year",
  with(ddca, cor(lnemprate_native, year)), lma$coef[1], lma$coef[2]))
lma <- (with(ddca, lm(immshare_emp_stem_e_grad ~ year)))
print(sprintf("%7.4f %9.4f %9.4f  immshare_emp_stem_e_grad ~ year",
  with(ddca, cor(immshare_emp_stem_e_grad, year)), lma$coef[1], lma$coef[2]))
lma <- (with(ddca, lm(lnimmshare_emp_stem_e_grad ~ year)))
print(sprintf("%7.4f %9.4f %9.4f  lnimmshare_emp_stem_e_grad ~ year",
  with(ddca, cor(lnimmshare_emp_stem_e_grad, year)), lma$coef[1], lma$coef[2]))

# zg1: Graph employed natives vs. immigration share (foreign STEM US adv. degrees)
with(dd, plot(emp_native ~ emp_imm, cex=0.5, pch=dd$sPch, col=dd$sCol,
  ylab="Employed Native Workers", xlab=forlab1))
title(main=paste("Foreign STEM Workers,", labyears))
	#with(dd, text(emp_imm, emp_native, labels=ss, cex=0.5, pos=4))
legend("bottomright", inset=0, title="State",
c("California","Connecticut","DC","Florida","Georgia","Illinois","Maryland","Massachusetts","Michigan","New Jersey","New York","Ohio","Oregon","Pennsylvania","Texas","Virginia","Washington"),
cex=0.5, pch=gg$pch, col=gg$col, horiz=FALSE)
readline("Press enter to continue, escape to exit")

x11()
# zg2: Graph Native Employment vs. Immigrant Share
with(dd, plot(emprate_native ~ immshare_emp_stem_e_grad, cex=0.5, pch=dd$sPch, col=dd$sCol,
  ylab="Native Worker Employment Rate", xlab=forlab2))
title(main="Native Employment Rate vs. Immigrant Share\n(with values of 0)")
legend("topright", inset=0, cex=0.5, pch=gg$pch, col=gg$col, horiz=FALSE,
c("California","Connecticut","DC","Florida","Georgia","Illinois","Maryland","Massachusetts","Michigan","New Jersey","New York","Ohio","Oregon","Pennsylvania","Texas","Virginia","Washington"))

lma <- (with(dd, lm(emprate_native ~ immshare_emp_stem_e_grad)))
abline(lma)
readline("Press enter to continue, escape to exit")

x11()
# zg3: Graph Natural Logs of Native Employment Rate vs. Immigrant Share
with(ddnz, plot(lnemprate_native ~ lnimmshare_emp_stem_e_grad, cex=0.5, pch=ddnz$sPch, col=ddnz$sCol,
  ylab="Log of Native Worker Employment Rate", xlab=paste("Log of", forlab2)))
title(main="Native Employment Rate vs. Immigrant Share\n(without values of 0)")
legend("bottomleft", inset=0, cex=0.5, pch=gg$pch, col=gg$col, horiz=FALSE,
c("California","Connecticut","DC","Florida","Georgia","Illinois","Maryland","Massachusetts","Michigan","New Jersey","New York","Ohio","Oregon","Pennsylvania","Texas","Virginia","Washington"))
legend("bottomright", inset=0, cex=0.5, lty=1, col=c("black","red"), horiz=FALSE,
c("w/o weighting","with weighting"))

lma <- (with(ddnz, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad)))
abline(lma)

lma <- (with(ddnz, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native)))
abline(lma, col="red")
readline("Press enter to continue, escape to exit")

x11()
# zg4: Graph Natural Logs of Native Employment Rate vs. Immigrant Share
with(ddz1, plot(lnemprate_native ~ lnimmshare_emp_stem_e_grad, cex=0.5, pch=ddz1$sPch, col=ddz1$sCol,
  ylab="Log of Native Worker Employment Rate", xlab=paste("Log of", forlab2)))
title(main="Native Employment Rate vs. Immigrant Share\n(with values of 0 set to 1)")
legend(x="bottomleft", inset=c(0.41,0), cex=0.5, pch=gg$pch, col=gg$col, horiz=FALSE,
c("California","Connecticut","DC","Florida","Georgia","Illinois","Maryland","Massachusetts","Michigan","New Jersey","New York","Ohio","Oregon","Pennsylvania","Texas","Virginia","Washington"))
legend("bottomright", inset=0, cex=0.5, lty=1, col=c("black","red"), horiz=FALSE,
c("w/o weighting","with weighting"))

lma <- (with(ddz1, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad)))
abline(lma)

lma <- (with(ddz1, lm(lnemprate_native ~ lnimmshare_emp_stem_e_grad, weights=weight_native)))
abline(lma, col="red")
readline("Press enter to continue, escape to exit")

x11()
# zg5: Graph Natural Logs of Native Employment vs. Immigrant Share
with(ddca, plot(emprate_native ~ year, cex=0.5, pch=ddca$sPch, col=ddca$sCol,
  ylab="Native Worker Employment Rate", xlab="Year"))
title(main="Native Employment Rate vs. Year in California")

lma <- (with(ddca, lm(emprate_native ~ year)))
abline(lma)
readline("Press enter to continue, escape to exit")

x11()
# zg6: Graph Natural Logs of Native Employment vs. Immigrant Share
with(ddca, plot(immshare_emp_stem_e_grad ~ year, cex=0.5, pch=ddca$sPch, col=ddca$sCol,
  ylab=forlab2, xlab="Year"))
title(main="Foreign STEM Share vs. Year in California")
legend("bottomright", inset=0, cex=0.5, lty=1, col=c("black","red"), horiz=FALSE,
c("with all years","without 2002"))

lma <- (with(ddca, lm(immshare_emp_stem_e_grad ~ year)))
abline(lma)

# Skip 2002 as an outlier
ddcam02 = ddca[ddca$year != 2002,]
lma <- (with(ddcam02, lm(immshare_emp_stem_e_grad ~ year)))
abline(lma, col="red")

Description of code
Source code for amjobsg.R (sourced code)