Loan Interest Rate Associations Analysis

Load some libraries

library(impute)

Load data

Read the data in

getwd()
## [1] "/home/jakub/R/DA/assignment1/final"
loans <- read.csv("../data/loansData.csv")

Preprocessing

Transform data types

loans$Interest.Rate <- as.numeric(sub("%", "", loans$Interest.Rate))
loans$Debt.To.Income.Ratio <- as.numeric(sub("%", "", loans$Debt.To.Income.Ratio))
loans$Employment.Length[loans$Employment.Length == "n/a"] <- NA
new.employment.year.levels <- levels(factor(loans$Employment.Length))
ordered.year.levels <- new.employment.year.levels[c(2:11, 1)]
loans$Employment.Length <- factor(loans$Employment.Length, levels = ordered.year.levels)
loans$FICO.Numeric <- as.numeric(as.character(gsub("-...", "", loans$FICO.Range)))

Final graphs

Correlation

loan_names <- names(loans)
loan_names <- loan_names[loan_names != "FICO.Range"]
loansMatrixTmp <- data.matrix(loans[, loan_names])
loansMatrix <- impute.knn(loansMatrixTmp)$data
## Cluster size 2500 broken into 1827 673 
## Cluster size 1827 broken into 1211 616 
## Done cluster 1211 
## Done cluster 616 
## Done cluster 1827 
## Done cluster 673
correlations <- cor(loansMatrix)
ordered <- correlations["Interest.Rate", ][order(-abs(correlations["Interest.Rate", 
    ]))]
ordered_other <- ordered[2:length(ordered)]
par(mfrow = c(2, 2))
par(mgp = c(3, 1, 0))
par(mar = c(6, 10, 4, 2))
barplot(abs(ordered_other), las = 2, horiz = TRUE, names = c("FICO", "Loan Length", 
    "Funded By Investors", "Requested", "Debt To Income Ratio", "Inquiries", 
    "Credit Lines", "Home Ownership", "Purpose", "Credit Balance", "Employment Length", 
    "Monthly Income", "State"), xlab = "Correlation")
mtext(text = "(a)", side = 3, line = 0, cex = 1.5)

plot of chunk unnamed-chunk-3

Scatter

par(mar = c(6, 6, 4, 2))
par(mgp = c(4.5, 1, 0))
boxplot(loans$Interest.Rate ~ loans$FICO.Range, col = "blue", las = 2, ylab = "Interest Rate (%)", 
    xlab = "FICO Range")
mtext(text = "(b)", side = 3, line = 1, cex = 1.5)

plot of chunk unnamed-chunk-4

interest rate by fico and loan length

par(mar = c(6, 6, 4, 2))
par(mgp = c(3, 1, 0))
plot(loans$FICO.Numeric, loans$Interest.Rate, col = (loans$Loan.Length), pch = 19, 
    cex = 1, xlab = "FICO Range", ylab = "Interest Rate (%)")

lmBoth <- lm(loans$Interest.Rate ~ loans$FICO.Numeric + loans$Loan.Length + 
    loans$Loan.Length * loans$FICO.Numeric)
abline(c(lmBoth$coeff[1], lmBoth$coeff[2]), col = "black", lwd = 5)
abline(c(lmBoth$coeff[1] + lmBoth$coeff[3], lmBoth$coeff[2] + lmBoth$coeff[4]), 
    col = "red", lwd = 5)
mtext(text = "(c)", side = 3, line = 1, cex = 1.5)

plot of chunk unnamed-chunk-5

# sem este pridam natrenovany linearny model a pode to do finalnej analyzy

The same thing using amount funded

# plot(loans$Interest.Rate, loans$FICO.Numeric,
# cex=log(loans$Amount.Requested/8000), pch=19)
par(mar = c(6, 6, 4, 2))
par(mgp = c(3, 1, 0))
colfunc <- colorRampPalette(c("orange", "blue"))
z.cols <- cut(loans$Amount.Funded.By.Investors, 50, labels = colfunc(50))
plot(loans$FICO.Numeric, loans$Interest.Rate, col = as.character(z.cols), pch = 19, 
    cex = 1.2, xlab = "FICO Range", ylab = "Interest Rate (%)")
legend("topright", legend = c("Big Amount Funded By Investor", "Small Amount Funded By Investor"), 
    col = c("blue", "orange"), pch = 19, cex = 1)
mtext(text = "(d)", side = 3, line = 1, cex = 1.5)

plot of chunk unnamed-chunk-6