algorithmica-repository
diff --git a/‎1.introduction/commands.txt‎
Lines changed: 56 additions & 0 deletions b/‎1.introduction/commands.txt‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎2.datastructures/datastructures1.R‎
Lines changed: 73 additions & 0 deletions b/‎2.datastructures/datastructures1.R‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎2.datastructures/datastructures2.R‎
Lines changed: 96 additions & 0 deletions b/‎2.datastructures/datastructures2.R‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎2.datastructures/datastructures3.R‎
Lines changed: 67 additions & 0 deletions b/‎2.datastructures/datastructures3.R‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎3.eda-stats/eda1.R‎
Lines changed: 10 additions & 0 deletions b/‎3.eda-stats/eda1.R‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎3.eda-stats/eda2.R‎
Lines changed: 35 additions & 0 deletions b/‎3.eda-stats/eda2.R‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎4.eda-graphics/eda-graphics.R‎
Lines changed: 16 additions & 0 deletions b/‎4.eda-graphics/eda-graphics.R‎
Lines changed: 16 additions & 0 deletions
@@ -0,0 +1,56 @@
+help related commands
+--------------------
+help.start()
+help(name) / ?name
+example(name)
+
+workspace commands
+------------------
+ls()
+ls(pattern="")
+rm(object name)
+data()
+source("script-file")
+getwd(), setwd()
+save.image()
+
+history commands
+---------------
+history()
+history(max.show=Inf)
+savehistory(file="")
+loadhistory(file="")
+
+packages commands
+---------------------
+install.packages("packagename")
+install.packages("packagename",repos="")
+library(packagename)
+library() -- shows all packages installed on machine(may or maynot be loaded into R workspace)
+search() -- show all the packages loaded into R workspace and also shows the search path order
+
+
+common functions
+---------------------
+length(object) # number of elements or components
+str(object) ���# structure of an object 
+class(object) �# class or type of an object
+names(object) �# names
+
+head(mydata, n=10)/head(mydata, 10)
+
+tail(mydata, n=5)/tail(mydata, 5)
+colnames(mydata)
+rownames(mydata)
+summary(mydata)
+
+summary functions
+----------------
+mean(x)/mean(x, na.rm=TRUE)
+median(x)
+table(x)
+var(x)
+sd(x)
+max(x)-min(x)
+IQR(x)
+quantile(x)
@@ -0,0 +1,73 @@
+a = 10
+b = 20.3
+c = a + b
+a=20
+ls()
+rm("b")
+
+v1 = 1:1000
+v1
+
+v = c(10,20,15,22,8,2)
+v[1:3]
+v[c(3,5)]
+v[v>15]
+v[-c(1,2)]
+which(v>15)
+
+v2=1:6
+v3=v+v2
+
+v4 = v * v2
+
+sd(v)
+length(v)
+v2 = sort(v)
+
+v3=seq(1,100,10)
+
+v4=rep(1,10)
+
+v5 = c("aa","bb","ccc")
+
+students = read.csv("E:/data analytics/datasets/students.csv",TRUE)
+students
+
+class(students)
+rn = c("user1","user2")
+cn = c("movie1","movie2","movie3")
+
+user_movies_ratings=matrix(1:6,2,3,dimnames=list(rn,cn))
+user_movies_ratings1=rbind(user_movies_ratings,c(5,2,5))
+
+
+row.names(user_movies_ratings) =c("user1","user2","user3")
+user_movies1=matrix(rep(1,6),2,3)
+dimnames(user_movies1)=list(rn,cn)
+dim(user_movies_ratings)
+nrow(user_movies_ratings)
+t(user_movies_ratings)
+
+user_movies_ratings2=matrix(1:6,3,2)s
+res =  user_movies_ratings1 %*% user_movies_ratings2
+ diag(3)
+
+v1=1:3
+dim(v1)
+length(v1)
+v1=as.matrix(v1)
+dim(v1)
+
+m1=matrix(1:6,2,3,TRUE,)
+m1
+m2=matrix(1:6,2,3)
+m2
+
+d = c(1,2,3,4)
+e = c("red", "white", "red", NA)
+f = c(TRUE,TRUE,TRUE,FALSE)
+mydata = data.frame(d,e,f)
+names(mydata) = c("ID","Color","Passed") 
+
+f=factor(c("y","n"))
+f
@@ -0,0 +1,96 @@
+
+
+### Let's create some data!
+
+
+## Vector
+
+vec_1 <- c(1,2,3,4,5,6)
+vec_1
+
+vec_2 <- 1:6
+vec_2
+
+vec_3 <- c("One", "Two", "Three", "Four", "Five", "Six")
+vec_3
+
+vec_4 <- 1:500
+vec_4
+
+
+## Matrix
+
+mat_1 <- matrix(1:16, nrow = 4, ncol = 4)
+mat_1
+
+mat_2 <- matrix(vec_1, nrow = 3, ncol = 2, byrow = TRUE)
+mat_2
+
+mat_3 <- matrix(vec_1, nrow = 3, ncol = 2, byrow = FALSE)
+mat_3
+
+rows <- c("A", "B", "C")
+cols <- c("AA", "BB")
+
+mat_4 <- matrix(vec_1, nrow = 3, ncol = 2, dimnames = list(rows, cols))
+mat_4
+
+rownames(mat_3) <- rows # rename rows afterwards
+mat_3
+
+## Array
+
+array_1 <- array(1:18, c(3,2,3))
+array_1
+
+dims <- c("AAA", "BBB", "CCC")
+array_2 <- array(1:18, c(3, 2, 3), dimnames = list(rows, cols, dims))
+array_2
+
+
+## Factor
+
+vec_1
+vec_4 <- c("odd", "even", "odd", "even", "odd", "even")
+vec_4
+
+fac_1 <- factor(vec_4)
+fac_1 
+as.numeric(fac_1) # Levels are created alphabetiacal
+
+vec_5 <- c("small", "small", "medium", "medium", "high", "high")
+vec_5
+
+fac_2 <- factor(vec_5, ordered = TRUE)
+fac_2 # look what happened to the Levels output!
+as.numeric(fac_2) # still alphabetical
+
+fac_3 <- factor(vec_5, ordered = TRUE, levels = c("small", "medium", "high"))
+fac_3 
+as.numeric(fac_3) # Yeah!
+
+
+## Data Frame
+
+df_1 <- data.frame(vec_1, vec_2, vec_3, fac_1, fac_3)
+df_1
+str(df_1) # shows structure - data.frame() usually converts characters into factors!
+
+names(df_1) <- c("var_1", "var_2", "var_3", "var_4", "var_5")
+df_1
+
+
+## List
+
+list_1 <- list(vec_1, vec_2, vec_3)
+list_1
+
+list_2 <- list(vector_1 = vec_1, vector_2 = vec_2, vector_3 = vec_3)
+list_2
+
+list_3 <- list(text = "Sample text", vector = vec_1, matrix = mat_2, array = array_2, factor = fac_3, data_frame = df_1)
+list_3
+
+
+### saving all data
+save.image(file="data.RData")
@@ -0,0 +1,67 @@
+
+## Vector
+
+vec_1 # whole vector
+vec_1[1] # 1st value
+vec_1[c(1,4)] #1st and 2nd value
+vec_1[1:4] # 1st thru 4th value
+
+## Matrix
+
+mat_1
+mat_1[1,] # 1st row
+mat_1[,1] # 1st column
+mat_1[1,1] # "1st" cell
+
+mat_4
+mat_4[c("A"),] # row "A"
+
+
+## Array
+
+array_2
+array_2[,,2] #2nd dimension
+array_2[,,c("BBB")] # same as above
+array_2[2,,]
+
+## Data Frame
+
+df_1
+df_1[1,] #1st row - see Matrix
+df_1$var_2 # coloumn/variable named "var_2"
+df_1$var_3 # coloumn/variable named "var_3"
+df_1[,3] # 3rd column
+
+## List
+
+list_3
+list_3[[3]] # selects the matrix
+list_3[3] # selects the matrix, but stays as list
+
+list_3$factor # selects list entry "factor"
+list_3$data_frame # selects list entry "data_frame"
+
+# why you should use double brackets
+test_1 <- list_3[[3]]
+test_2 <- list_3[3]
+test_1
+test_2
+is.matrix(test_1) # is matrix
+is.list(test_2) # is list
+
+list_3[[c("matrix")]] # works also
+list_3$matrix # works also
+
+
+## nesting
+
+list_3$data_frame$var_4 # selects column "var_4" in list entry "data_frame"
+list_3[[6]]$var_4 # same as above
+list_3[[c("data_frame")]]$var_4 # same as above
+
+
+## different ways to access data
+
+array_2
+array_2[1,2,2]
+array_2[,,2][1,2] # same as above
@@ -0,0 +1,10 @@
+students = read.csv("E:/data analytics/datasets/students.csv",TRUE)
+class(students)
+dim(students)
+names(students)
+str(students)
+head(students,10)
+tail(students)
+summary(students)
+summary(students[,"Height"])
+summary(students$Height)
@@ -0,0 +1,35 @@
+ library(dplyr)
+
+students=read.csv("E:/data analytics/datasets/students.csv")
+d=dim(students)
+typeof(d)
+str(students)
+head(students,10)
+tail(students)
+summary(students)
+mean(students$Height)
+median(students$Height)
+mean(c(46,37,40,33,42,36,40,47,34,45))
+sd(students$Height)
+mad(students$Height)
+IQR(students$Height)
+mean(students$MilesHome)
+mean( students$MilesHome, na.rm = TRUE)
+class(students)
+ 
+students1 = filter(students, Sleep %in% c(6,8) & BloodType=="O")
+students2 = select(students1,Height) 
+ 
+ students %>% 
+   filter(Sleep %in% c(6,8) & BloodType=="O") %>%
+   arrange(Height) %>% mutate(Family=Brothers + Sisters) %>%
+   summarise(n())
+ 
+ by.major = group_by(students, Major)
+ class(by.major)
+ summarise(by.major, count=n()) 
+ 
+ 
+   
+ 
+ 
@@ -0,0 +1,16 @@
+library(ggplot2)
+
+students=read.csv("E:/data analytics/datasets/students.csv")
+dim(students)
+summary(students$Sex)
+table(students$Level)
+with(students, table(Level))
+
+ggplot(students, aes(x = BloodType)) + geom_bar()
+with(students, table(Sex, Level))
+ggplot(students, aes(x = Level, fill = BloodType)) + geom_bar(position = "dodge")`-
+  
+ggplot(students, aes(x = Height)) + geom_dotplot()
+
+
+