One thing that i’ve always wanted is have a function that would run the corrplot function of all the numerical variables.

Since i’m still new to R, this is all i could come up with. It definitely needs some more work, but this’ll have to do for now:

explData = function(data, shape = "square", sig = 0.05, insign = "pch") { library(corrplot) library(dplyr) library(ggvis) #***1. Removing character columns*** limit = ncol(data) #Assign number of columns z = 0 for(i in ncol(data):1){ if(data[,i] == "character"){ data[,i] = NULL z = z + 1 } } print(paste(z, "columns with class character were deleted.", sep = " ")) y = 0 #Counter to tally how many columns were removed for(i in limit:1){ curr.column = data[,i] if(sum(is.na(as.numeric(as.character(curr.column)))) == length(curr.column)){ #If the number of characters in the column are equal to the length of the column data[,i] = NULL #Then delete column y = y + 1 #Add one to counter } } print(paste(y, "factor columns were deleted. Could not coerce into integers.", sep = " ")) #Notify how many columns were removed a = 0 for(i in ncol(data):1){ if(sd(data[,i]) == 0 | is.na(sd(data[,i]))){ data[,i] = NULL a = a + 1} } print(paste(a, "columns with standard deviation equaling zero, were deleted.", sep = " ")) #Notify how many columns were removed #***_____________________________*** #***2. Creating a dataframe of all p.values from correlation of all variables*** corrs.pvalues = data.frame() for(i in 1:ncol(data)){ for(j in 1:ncol(data)){ corrs.pvalues[i,j] = cor.test(data[,i], data[,j])$p.value } } #***__________________________________________________________________________*** corrs = cor(data) #matrix of correlation coefficients #***3. Plot correlation corrplot, crossing out statistically insignificant relationships corrplot(corrs, p.mat = as.matrix(corrs.pvalues), sig.level = sig, method = shape, type = "lower", order = "FPC", addrect = 2, insig = insign) } #______________END!_____________________