Skip to content

Commit 1c40d5f

Browse files
author
algorithmica-repository
committed
Uploading kaggle recommendation data and solution
1 parent 6d7396c commit 1c40d5f

File tree

5 files changed

+1000398
-0
lines changed

5 files changed

+1000398
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
library(recommenderlab)
2+
library(ggplot2)
3+
4+
#loading the data
5+
data(MovieLense)
6+
7+
r = MovieLense
8+
str(r)
9+
class(r)
10+
dim(r)
11+
12+
as(r,"matrix")[1:2,1:10]
13+
14+
summary(getRatings(r))
15+
qplot(getRatings(r), binwidth = 1,
16+
main = "Histogram of ratings", xlab = "Rating")
17+
18+
rowMeans(r[1,])
19+
20+
r_norm = normalize(r)
21+
as(r_norm, "matrix")[1,1:10]
22+
23+
r_norm = normalize(r, method="Z-score")
24+
as(r_norm, "matrix")[1,1:10]
25+
26+
image(MovieLense,main="Raw Ratings")
27+
image(r_norm,main="Normalized Ratings")
28+
29+
qplot(rowCounts(MovieLense), binwidth = 10,
30+
main = "Movies Rated on average",
31+
xlab = "# of users",
32+
ylab = "# of movies rated")
33+
34+
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
35+
36+
rec=Recommender(r,method="UBCF", param=list(normalize = "Z-score",method="Cosine",nn=5, minRating=1))
37+
str(rec)
38+
getModel(rec)
39+
recom = predict(rec, r[1:50], type="ratings")
40+
41+
as(recom, "matrix")[1:2,1:10]
42+
43+
rec=Recommender(r[1:400],method="UBCF", param=list(normalize = "Z-score",method="Jaccard",nn=5, minRating=1))
44+
rec=Recommender(r[1:100],method="IBCF", param=list(normalize = "Z-score",method="Jaccard",minRating=1))
45+
46+
47+
scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4)
48+
49+
scheme
50+
51+
algorithms <- list(
52+
"random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
53+
"popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
54+
"user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),
55+
"item-based CF" = list(name="IBCF2", param=list(normalize = "Z-score", method="Cosine"))
56+
)
57+
# run algorithms, predict next n movies
58+
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
59+
60+
# Draw ROC curve
61+
plot(results, annotate = 1:4, legend="topleft")
62+
63+
# See precision / recall
64+
plot(results, "prec/rec", annotate=3)
65+
66+
#ggplot(r_df, aes(x = ratings)) + geom_histogram(aes(y=..density..),binwidth=0.5,colour="black", fill="white") + geom_density() + xlab("Rating") + labs(title="Histogram of ratings")
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
library(recommenderlab)
2+
library(ggplot2)
3+
library(reshape2)
4+
5+
ls(pos="package:recommenderlab")
6+
7+
setwd("E:/data analytics/kaggle/movie-rating")
8+
data = read.csv("train_v2.csv", header = TRUE)
9+
str(data)
10+
head(data)
11+
data = data[,-c(1)]
12+
user_movie=acast(data, user~movie,value.var='rating')
13+
class(user_movie)
14+
dim(user_movie)
15+
str(user_movie)
16+
user_movie[1:2,1:2]
17+
18+
user_movie_rating = as(user_movie,"realRatingMatrix")
19+
class(user_movie_rating)
20+
str(user_movie_rating)
21+
rowMeans(user_movie_rating[1,])
22+
user_movie_rating_norm1 = normalize(user_movie_rating)
23+
as(user_movie_rating_norm1, "matrix")[1,1:10]
24+
user_movie_rating_norm2 = normalize(user_movie_rating, method="Z-score")
25+
as(user_movie_rating_norm2, "matrix")[1,1:10]
26+
27+
image(user_movie_rating,main="Raw Ratings")
28+
image(user_movie_rating_norm1,main="Centered Ratings")
29+
image(user_movie_rating_norm2,main="Normalized Ratings")
30+
31+
summary(getRatings(user_movie_rating))
32+
qplot(getRatings(user_movie_rating), binwidth = 1,
33+
main = "Histogram of ratings", xlab = "Rating")
34+
qplot(rowCounts(user_movie_rating), binwidth = 10,
35+
main = "Movies Rated on average",
36+
xlab = "# of users",
37+
ylab = "# of movies rated")
38+
39+
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
40+
41+
rec.model=Recommender(user_movie_rating,method="UBCF", param=list(normalize = "Z-score",method="Cosine",nn=5, minRating=1))
42+
str(rec.model)
43+
getModel(rec.model)
44+
45+
# recommended top 5 items for first user
46+
recommended.items = predict(rec.model, user_movie_rating[1:2,], n=5, type="topNList")
47+
class(recommended.items)
48+
str(recommended.items)
49+
as(recommended.items, "list")
50+
51+
# to predict affinity to all non-rated items
52+
recommended.ratings = predict(rec.model, user_movie_rating[1:2,], type="ratings")
53+
as(recommended.ratings, "matrix")[2][1:20]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
library(recommenderlab)
2+
library(ggplot2)
3+
library(reshape2)
4+
5+
ls(pos="package:recommenderlab")
6+
7+
setwd("E:/data analytics/kaggle/movie-rating")
8+
data = read.csv("train_v2.csv", header = TRUE)
9+
str(data)
10+
head(data)
11+
data = data[,-c(1)]
12+
user_movie=acast(data, user~movie,value.var='rating')
13+
class(user_movie)
14+
dim(user_movie)
15+
user_movie[1:2,1:2]
16+
17+
user_movie_rating = as(user_movie,"realRatingMatrix")
18+
class(user_movie_rating)
19+
image(user_movie_rating,main="Raw Ratings")
20+
21+
summary(getRatings(user_movie_rating))
22+
qplot(getRatings(user_movie_rating), binwidth = 1,
23+
main = "Histogram of ratings", xlab = "Rating")
24+
qplot(rowCounts(user_movie_rating), binwidth = 10,
25+
main = "Movies Rated on average",
26+
xlab = "# of users",
27+
ylab = "# of movies rated")
28+
29+
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
30+
31+
rec.model=Recommender(user_movie_rating,method="UBCF", param=list(normalize = "Z-score",method="Cosine",nn=5, minRating=1))
32+
str(rec.model)
33+
getModel(rec.model)
34+
35+
# recommended top 5 items for first user
36+
recommended.items = predict(rec.model, user_movie_rating[1:2,], n=5)
37+
class(recommended.items)
38+
str(recommended.items)
39+
as(recommended.items, "list")
40+
41+
# to predict affinity to all non-rated items
42+
recommended.ratings = predict(rec.model, user_movie_rating[1:2,], type="ratings")
43+
as(recommended.ratings, "matrix")[2][1:20]
44+
45+
46+
#rec=Recommender(r[1:400],method="UBCF", param=list(normalize = "Z-score",method="Jaccard",nn=5, minRating=1))
47+
#rec=Recommender(r[1:100],method="IBCF", param=list(normalize = "Z-score",method="Jaccard",minRating=1))
48+
49+
50+
scheme = evaluationScheme(user_movie_rating, method="cross-validation", k=4, given=5, goodRating=5)
51+
52+
scheme
53+
54+
algorithms = list(
55+
"random items" = list(name="RANDOM", param=NULL),
56+
"popular items" = list(name="POPULAR", param=NULL),
57+
"user-based CF" = list(name="UBCF", param=NULL),
58+
"item-based CF" = list(name="IBCF", param=NULL),
59+
"svd-based CF" = list(name="SVD", param=NULL)
60+
)
61+
# run algorithms, predict next n movies
62+
results = evaluate(scheme, algorithms)
63+
64+
# Draw ROC curve
65+
plot(results, annotate = 1:5, legend="topleft")
66+
67+
# See precision / recall
68+
plot(results, "prec/rec", annotate=3)

0 commit comments

Comments
 (0)