Code fragments of main.py

Graph

undefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedmain.py # See for reference: https://naomy-gomes.medium.com/the-cosine-similarity-and-its-use-in-recommendation-systems-cb2ebd811ce1 None import pandas as pd from helper import preprocess, cosine_sim, get_combinations, show_welcome_text import psycopg2 import json import os ''' This is a simple supplier recommendation system using cosine similarity to provide recommendations for a specific rating input based on the the most similar rating entries. ''' # get ratings from database conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] ) cursor = conn.cursor() cursor.execute("SELECT * FROM supplier_data") ratings = cursor.fetchall() column_names = [desc[0] for desc in cursor.description] df = pd.DataFrame(ratings, columns=column_names) def recommend(input_order_index, df): """ Content based recommendations Returns the most similar suppliers """ # create vector and temp df without id columns df_temp = df.copy() cols = ['rating_id'] + df_temp.columns[df_temp.iloc[-1].isna()].tolist() # gets corresponding values of rating_id value_vec = df.loc[input_order_index] # drop specified columns value_vec = value_vec.drop(index = cols) value_vec = value_vec.values df_temp.drop(columns = cols, inplace = True) # drop last column df_temp.drop(df_temp.tail(1).index, inplace=True) df.drop(df.tail(1).index, inplace=True) # calculate similarity of input rating_id vector and all other vectors # create a column containing the values of the cosine similarity df_temp['sim']= df_temp.apply(lambda x: cosine_sim(value_vec,x.values), axis=1) # goes through all the rating vectors in the table df = pd.merge(df_temp, df, how='inner',on=None, validate=None) # merge temp df with similarity column into df # drop the last row # gets only the X most similar rows df_rec = df.nlargest(4, columns='sim') df_final = df_rec.sort_values("sim", ascending=False) # order by highest similarity value print("\nFound Similarities:\n", df_final) return df_final def calc_total_rating(df, supplier): filtered_df = df[df['supplier_name'] == supplier] in_time_count = filtered_df[filtered_df['rating'] == 'in time'].shape[0] total_count = round((in_time_count/filtered_df.shape[0])*100, 2) return total_count def add_ratings(df, df_original): supplier_dict = df.to_dict() supplier_list = [{key: s, "ratings": calc_total_rating(df_original, s)} for (key, s) in supplier_dict.items()] return supplier_list def supplier_recommend(input_order, df): # df = pd.read_json(r'testratings.json') df_original = df.copy() input_order_df = pd.DataFrame([input_order], columns=df.columns) # append the new row to df df = pd.concat([df, input_order_df], ignore_index=True) input_order_index = df.shape[0] - 1 # add index column df['rating_id'] = df.index df = preprocess(df) # call recommendation fct df_recommendations = recommend(input_order_index, df) # search supplier name for the recommended IDs, drop duplicates df_suppliers = pd.merge(df_recommendations, df_original, how='inner',on=None, validate=None)["supplier_name"] df_suppliers.drop_duplicates(inplace=True) list_suppliers = add_ratings(df_suppliers, df_original) return list_suppliers show_welcome_text() combinations = get_combinations() recommended_supplier_list = [] for combi in combinations: list_suppliers = supplier_recommend(combi, df) recommended_supplier_list.append([combi, list_suppliers]) print(recommended_supplier_list) result_file = 'result.json' with open(result_file, 'w') as f: json.dump(recommended_supplier_list, f)