data<-read.csv("Why are employees leaving.csv", header = TRUE) #left job (how manypeople ??) sum(data$left) #creation of pi chart pie(c(sum(data$left),nrow(data)-sum(data$left)), labels = unique(data$left), col = rainbow(3)) # analysis division wise data left_div<-aggregate(. ~ data$Division, data=data, sum) #analysis division wise how many people left left_div1<-aggregate(data$left ~ data$Division, data= data, sum) # Display Division wise average data divisional_data<-aggregate(data, by=list(data$Division), mean) # plot data between satisfaction and latest evauation plot(data$satisfaction_level, data$last_evaluation) #Analyzies person who left and why? left_data<-data[which(data$left==1), names(data)] plot(left_data$satisfaction_level, left_data$last_evaluation) # Assigned individual cluster name category<-c(rep("NA",nrow(left_data)-1)) for (i in 1:nrow(left_data)){ if(left_data$satisfaction_level[i] >0.7 & left_data$last_evaluation[i] > 0.7) { category[i]<- "winner" } else if(left_data$satisfaction_level[i] <0.2 & left_data$last_evaluation[i] > 0.7){ category[i]<- "unfulfilled" } else if(left_data$satisfaction_level[i] <0.5 & left_data$last_evaluation[i] < 0.6){ category[i]<- "badmatch" } } left_data<-cbind(left_data,category) sum(left_data$category=="winner") sum(left_data$category=="badmatch") sum(left_data$category=="unfulfilled") View(left_data) #Division wise analysis for winner category t1<-aggregate(left_data$left[which(left_data$category=="winner")], by= list(left_data$Division[which(left_data$category=="winner")]), FUN = sum)