randomForest_rscript/randomForest.R at master · xjy13/randomForest_rscript · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
***Using randomForest Package***
# build in Feb.2009 and final release in Oct. 2010

# Load randomForest package.
library(randomForest)

# Load *.csv data.
fin <- read.csv("file path ", header = TRUE, sep = ",", dec =".")

# run random forest function
runRF = function()
{
        #seperate testing data and training data
	ind <- sample(2, nrow(fin), replace = TRUE, prob = c(0.8, 0.2))

	fin.rf <- randomForest(type ~ ., data = fin[ind==1,], ntree=1000,mtry=7,importance = TRUE, proximity = TRUE)
	fin.pred <- predict(fin.rf, fin[ind==2, ])
	classError <- table(observed = fin[ind==2, "type"], predicted = fin.pred)

	# Sum of "A" samples
	fSamples = sum(classError[1, ])

	# Sum of "B" samples
	mSamples = sum(classError[2, ])

	 # Sum of "C" samples
        nsamples=sum(classError[3,])

   	#Sum of "D" samples
   	xsamples=sum(classError[4,])

  # Classify "A" to "B" error samples
 	errorF = classError[1, 2]

                #Classify "A" to "C" error samples
                errorF2=classError[1,3]

                #Classify "A to "D" error samples
                errorF3=classError[1,4]

                # Classify "B" to "A" error samples
                errorM= classError[2, 1]

               # Classify "B" to "C" error samples
               errorM2=classError[2,3]

               #Classify"C"to"D"error samples
               errorM3=classError[2,4]

              # Classify "C" to "A" error samples
               errorN=classError[3,1]

             # Classify "C" to "B" error samples
             errorN2=classError[3,2]

             #Classify"C"to"D" error samples
             errorN3=classError[3,4]

             #Classify"D"to"A" error samples
             errorX=classError[4,1]

             #Classify"D"to"B" error samples
             errorX2=classError[4,2]

             #Classify"D"to"C" error samples
             errorX3=classError[4,3]

	# Accuracy
	accuracy <- (classError[1, 1] + classError[2, 2]+classError[3,3]+classError[4,4])/length(fin.pred)

	# Output results
	list(F = fSamples, M = mSamples,N=nsamples,X=xsamples, eF1 = errorF,eF2=errorF2,eF3=errorF3, eM1= errorM, eM2=errorM2, eM3=errorM3, eN1=errorN, eN2=errorN2, eN3=errorN3, eX1=errorX, eX2=errorX2, eX3=errorX3, Accuracy = accuracy)
}

# doCV(): Usage doCV(x) - The x express the implementation of times for cross-validation
doCV = function(x)
{
	x = x
	cvResult = data.frame(matrix(0, x, 17))
	for(i in 1:x)
	{
		RF = runRF()
		cvResult[i,] = c(RF$F, RF$M,RF$N,RF$X, RF$eF1,RF$eF2,RF$eF3, RF$eM1,RF$eM2,RF$eM3, RF$eN1,RF$eN2,RF$eN3,RF$eX1,RF$eX2,RF$eX3,RF$Accuracy)
		rownames(cvResult[i, ]) = c(i)
	}
	colnames(cvResult) = c("A", "B", "C","D","A¡÷B", "A¡÷C","A¡÷D","B¡÷A","B¡÷C","B¡÷D","C¡÷A","C¡÷B","C¡÷D","D¡÷A","D¡÷B","D¡÷C" ,"Accuracy")

	# Save the result to *.csv.
	write.csv(cvResult, file = "result.csv")
	# Print to console.
	cvResult
}

==============================================================================================================================
# load this library to observe the factors' important rating
library(varSelRF)

fin<- read.csv("file_path", header = TRUE, sep = ",", dec =".")
x<-fin[,1:22]
y<-fin[,23]
cl<-factor(y)
# setup your quantity of decision tree , run these tree for n times ...etc
fin.vs1 <- varSelRF(x, cl, ntree =200, ntreeIterat = 100,vars.drop.frac = 0.2,c.sd=1)
fin.vsb <- varSelRFBoot(x, cl,bootnumber = 10,usingCluster = FALSE,srf = fin.vs1)

fin <- read.csv("file_path", header = TRUE, sep = ",", dec =".")
x<-fin[,1:22]
y<-fin[,23]
cl<-factor(y)
fin.rf <- randomForest(type ~ ., data=fin, ntree=3000, keep.forest=FALSE,importance=TRUE)
fin.rvi <- randomVarImpsRF(x, cl, fin.rf,numrandom = 22, usingCluster = FALSE)
varSelImpSpecRF(fin.rf, randomImps =fin.rvi)
randomVarImpsRFplot(fin.rvi, fin.rf)