install.packages("devtools", dependencies = TRUE) library(devtools) devtools::install_github("ayhandis/creditR") library(creditR) ls("package:creditR") #Model data and data structure data("germancredit") str(germancredit) #Preparing a sample data set sample_data <- germancredit[,c("duration.in.month","credit.amount","installment.rate.in.percentage.of.disposable.income", "age.in.years","creditability")] #Converting the ‘Creditability’ (default flag) variable into numeric type sample_data$creditability <- ifelse(sample_data$creditability == "bad",1,0) #Calculating the missing ratios missing_ratio(sample_data) #Splitting the data into train and test sets traintest <- train_test_split(sample_data,123,0.70) train <- traintest$train test <- traintest$test #Applying WOE transformation on the variables woerules <- woe.binning(df = train,target.var = "creditability",pred.var = train,event.class = 1) train_woe <- woe.binning.deploy(train, woerules, add.woe.or.dum.var='woe') #Creating a dataset with the transformed variables and default flag train_woe <- woe.get.clear.data(train_woe,default_flag = "creditability",prefix = "woe") #Applying the WOE rules used on the train data to the test data test_woe <- woe.binning.deploy(test, woerules, add.woe.or.dum.var='woe') test_woe <- woe.get.clear.data(test_woe,default_flag = "creditability",prefix = "woe") #Performing the IV and Gini calculations for the whole data set IV.calc.data(train_woe,"creditability") Gini.univariate.data(train_woe,"creditability") #Creating a new dataset by Gini elimination. IV elimination is also possible eliminated_data <- Gini_elimination(train_woe,"creditability",0.10) str(eliminated_data) #A demonstration of the functions useful in performing Clustering clustering_data <- variable.clustering(eliminated_data,"creditability", 2) clustering_data # Returns the data for variables that have the maximum gini value in the dataset selected_data <- variable.clustering.gini(eliminated_data,"creditability", 2) correlation.cluster(eliminated_data,clustering_data,variables = "variable",clusters = "Group") #Creating a logistic regression model of the data model= glm(formula = creditability ~ ., family = binomial(link = "logit"), data = eliminated_data) summary(model) #Calculating variable weights woe.glm.feature.importance(eliminated_data,model,"creditability") #Generating the PD values for the train and test data ms_train_data <- cbind(eliminated_data,model$fitted.values) ms_test_data <- cbind(test_woe[,colnames(eliminated_data)], predict(model,type = "response", newdata = test_woe)) colnames(ms_train_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned","woe.installment.rate.in.percentage.of.disposable.income.binned","creditability","PD") colnames(ms_test_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned","woe.installment.rate.in.percentage.of.disposable.income.binned","creditability","PD")