## ftse100.R - Display employee productivity for FTSE-100 consitituents
## Copyright © 2010 Allan Engelhardt <http://www.cybaea.net/>
## All Rights Reserved.

## Get the index constituents.
ftse.100 <- read.csv(file = "http://uk.old.finance.yahoo.com/d/quotes.csv?s=@%5EFTSE&f=s&e=.csv", header = FALSE)
names(ftse.100) <- c("symbol")
data <- data.frame(symbol=NULL, employees=NULL, profit=NULL, sector=NULL)
## For each stock symbol, get employees, profit, and sector
for (symbol in ftse.100$symbol) {
    profile.url <- paste("http://uk.finance.yahoo.com/q/pr?s=", symbol, sep="")
    con <- url(profile.url, open = "r")
    text <- readChar(con, 2^24)     # enough bytes
    close(con)
    x <- sub('.*Number of employees:</td><td.*?>[[:space:]]*([[:digit:],]+).*', "\\1", text, ignore.case = TRUE)
    x <- gsub(',', '', x)
    empl <- tryCatch(as.integer(x), warning = function(x) NA)
    x <- sub('.*Net Profit.*?</td><td.*?>[[:space:]]*([+-]?[[:digit:],]+).*', '\\1', text)
    x <- gsub(',', '', x)
    profit <- tryCatch(as.integer(x)*1e6, warning = function(x) NA)
    sector <- sub('.*Sector:</td><td.*?>(.*?)</td>.*', '\\1', text)
    if (any(c(empl, profit) <= 0, is.na(c(empl, profit)))) {
        cat("Error parsing symbol", symbol, "see", profile.url, "\n")
    } else {
        data <- rbind(data, data.frame(symbol=symbol, employees=empl, profit=profit, sector=sector))
    }
    Sys.sleep(1)
}
## Save the data so we don't have to hit Yahoo all the time.
save(data, file = "data.RData")

## Save plot to file:
#png(filename="ftse100.png", width=800, height=800, pointsize=14, bg="white", res=100)

opar <- par(cex.sub = sqrt(sqrt(2)), font.sub = 3, font.lab = 2)

## x and y coordinates of plot and plot limits
x <- with(data, employees)
y <- with(data, profit/employees)
xlim <- c(10^floor(log10(min(x))), 10^ceiling(log10(max(x))))
ylim <- c(10^floor(log10(min(y))), 10^ceiling(log10(max(y))))

## Set up to display different color and symbols
plot_col <- 1
plot_pch <- 1
markers <- 21:25
pchs <- rep(markers, ceiling(length(levels(data$sector))/length(markers)))
palette(rainbow(length(levels(data$sector)), start=3/6, end=6/6))

# Make empty plot:
plot.new()
plot(profit/employees ~ employees, data = data[FALSE, ], 
     type = "p", pch = pchs[plot_pch], col = plot_col,
     log="xy", xaxp = c(xlim, 1), yaxp = c(ylim, 1), xlim = xlim, ylim = ylim,
     main = "Profit per employee (FTSE 100)", xlab = "Employees", ylab = "Profit per employees (GBP)")

## Plot each sector
for (sector in levels(data$sector)) {
    plot.xy(xy.coords(with(data[data$sector == sector,], employees),
                      with(data[data$sector == sector,], profit/employees),
                      log = "xy", xlab = "", ylab = ""),
            type = "p", pch = pchs[plot_pch], col = plot_col, bg = plot_col)
    plot_pch <- plot_pch + 1
    plot_col <- plot_col + 1
}
legend(x = "bottomleft", legend = levels(data$sector), title = "Industry Sectors", col = palette(), pt.bg = palette(), pch = pchs, cex = 2/3, pt.cex = 1, ncol = 2)

## Fit a linear model to the log-log data:
m <- lm(log10(y) ~ log10(x))
xl <- c(xlim[1]*5, xlim[2]/5)
yl <- 10^predict(m, data.frame(x = xl))
lines(xl, yl, col = "darkred", lty = "dashed", lwd = 2)
t <- sprintf("Power = %0.3g", m$coefficients[2])
text(xl[2], yl[2], t, adj = c(0.25, -1.5), col = "darkred", font = 2)

## All done.
par(opar)
dev.off()


