# Factorization Machines in R devtools::install_github("andland/libFMexe") #library(rrecsys) #data("ml100k") library(libFMexe) data(movie_lens) set.seed(1) train_rows = sample.int(nrow(movie_lens), nrow(movie_lens) * 2 / 3) train = movie_lens[train_rows, ] test = movie_lens[-train_rows, ] predFM = libFM(train, test, Rating ~ User + Movie, task = "r", dim = 10, iter = 30, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") head(predFM) # How good is RMSE ? mean((predFM - test$Rating)^2) # Con matrices sparse train_mat = Matrix::sparse.model.matrix(Rating ~ User + Movie - 1, train) test_mat = Matrix::sparse.model.matrix(Rating ~ User + Movie - 1, test) predFM = libFM(train_mat, test_mat, train$Rating, test$Rating, task = "r", dim = 10, iter = 300, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") mean((predFM - test$Rating)^2) # Now, some cross-validation (SLOW) mses = cv_libFM(train, Rating ~ User + Movie, task = "r", dims = seq(0, 20, by = 10), iter = 50, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") mses # CV with a sparse matrix movie_mat = Matrix::sparse.model.matrix(Rating ~ User + Movie - 1, movie_lens) mses = cv_libFM(movie_mat, movie_lens$Rating, task = "r", dims = c(0, 5, 10), cv_verbosity = 1, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") mses # Moodplay ---- published at: http://dparra.sitios.ing.uc.cl/pdfs/preprint-andjelkovic-IJCHS_Moodplay.pdf # 1. Read data into dataframe dfmplay <- read.csv("./datasets/moodplay-data-ckit.csv") mplay_lfm <- model_frame_libFM(rating ~ userid + itemid + primary_artist_mood, dfmplay) mses_1 = cv_libFM(dfmplay, rating ~ userid + itemid , task = "r", init_stdevs = 0.001, dims = seq(0, 100, by = 50), iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") sqrt(mses_1) mses_2 = cv_libFM(dfmplay, rating ~ userid + itemid + primary_artist_mood, task = "r", init_stdevs = 0.01, dims = seq(0, 100, by = 25), iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") sqrt(mses_2) mses_3 = cv_libFM(dfmplay, rating ~ userid + itemid + condition, task = "r", dims = seq(0, 100, by = 25), iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") sqrt(mses_3) mses_4 = cv_libFM(dfmplay, rating ~ userid + itemid + condition + primary_artist_mood, task = "r", dims = seq(0, 100, by = 25), iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") sqrt(mses_4) # for MAE must be different so, using the optimal values: set.seed(1) MAEs_FM <- c() MAEs_FMCI <- c() for (j in 1:30) { train_rows_mp = sample.int(nrow(dfmplay), nrow(dfmplay) * 2 / 3) train_mp = dfmplay[train_rows_mp, ] test_mp = dfmplay[-train_rows_mp, ] predFM_1 = libFM(train_mp, test_mp, rating ~ userid + itemid, task = "r", dim = 40, iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") predFM_1 MAEs_FM <- c( MAEs_FM, mean(abs(predFM_1 - test_mp$rating)) ) # predFM_2 = libFM(train_mp, test_mp, rating ~ userid + itemid + primary_artist_mood, task = "r", dim = 10, iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") # mean(abs(predFM_2 - test_mp$rating)) predFM_2 = libFM(train_mp, test_mp, rating ~ userid + itemid + primary_artist_mood, task = "r", dim = 20, iter = 500, exe_loc = "/Users/denisparra/libfm-1.42.src/bin/") mean(abs(predFM_2 - test_mp$rating)) MAEs_FMCI <- c(MAEs_FMCI , mean(abs(predFM_2 - test_mp$rating)) ) } # convert dataframe to LIBSVM format, to ingest by LIBFM dfmplay <- read.csv("./datasets/moodplay-data-ckit.csv") mplay_lfm1 <- model_frame_libFM(rating ~ userid + itemid, dfmplay) mplay_lfm2 <- model_frame_libFM(rating ~ userid + itemid + primary_artist_mood, dfmplay) train_rows_mp = sample.int(length(mplay_lfm1), length(mplay_lfm1) * 2 / 3) train_mp = mplay_lfm1[train_rows_mp] test_mp = mplay_lfm1[-train_rows_mp] # Dump data (rating, user, item) to text files for train and test lapply(train_mp, write, "moodplay.train", append=TRUE, ncolumns=1000) lapply(test_mp, write, "moodplay.test", append=TRUE, ncolumns=1000) # Dump data (rating, user, item, primary_artist_mood) to text files for train and test train_mp = mplay_lfm2[train_rows_mp] test_mp = mplay_lfm2[-train_rows_mp] lapply(train_mp, write, "moodplay_mood.train", append=TRUE, ncolumns=1000) lapply(test_mp, write, "moodplay_mood.test", append=TRUE, ncolumns=1000)