Movie dataset from Group Lens. Thanks ChatGPT for the mentorship!
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
df_movies = pd.read_csv('movies.csv')
df_ratings = pd.read_csv('ratings.csv')
df_movies.head()
df_ratings.head()
df_combined = pd.merge(df_movies, df_ratings, on='movieId')
df_combined.head()
df_combined.value_counts('title')
df_pivot = df_combined.pivot_table(index = 'userId', columns = 'title', values = 'rating')
df_pivot.head()
similarity_matrix = cosine_similarity(df_pivot.fillna(0))
similarity_matrix
def find_movie(movie_title):
movie_index = df_pivot.columns.get_loc(movie_title)
similarity_score = similarity_matrix[movie_index]
similar_movie_indices = similarity_score.argsort()[::-1]
similar_movie_indices = similar_movie_indices[similar_movie_indices != movie_index]
similar_movies = df_pivot.columns[similar_movie_indices][:5]
return similar_movies
similar_titles = find_movie("Alfie (2004)")
', '.join(similar_titles)