PADME Train Wiki

Train Selection

Trains
SupplierRecommendation
880
main.py

Code fragments of main.py

# See for reference: https://naomy-gomes.medium.com/the-cosine-similarity-and-its-use-in-recommendation-systems-cb2ebd811ce1
import pandas as pd
from helper import preprocess, cosine_sim, get_combinations, show_welcome_text
import psycopg2
import json
import os
''' This is a simple supplier recommendation system using cosine similarity to provide recommendations for a specific rating input based on the the most similar rating entries. '''
# get ratings from database
conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] )
conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] )
cursor = conn.cursor()
cursor.execute("SELECT * FROM supplier_data")
ratings = cursor.fetchall()
column_names = [desc[0] for desc in cursor.description]
df = pd.DataFrame(ratings, columns=column_names)
def recommend(input_order_index, df):
""" Content based recommendations Returns the most similar suppliers """
# create vector and temp df without id columns
df_temp = df.copy()
cols = ['rating_id'] + df_temp.columns[df_temp.iloc[-1].isna()].tolist()
# gets corresponding values of rating_id
value_vec = df.loc[input_order_index]
# drop specified columns
value_vec = value_vec.drop(index = cols)
value_vec = value_vec.values
# gets corresponding values of rating_id
df_temp.drop(columns = cols, inplace = True)
# drop last column
df_temp.drop(df_temp.tail(1).index, inplace=True)
df.drop(df.tail(1).index, inplace=True)
# calculate similarity of input rating_id vector and all other vectors
# create a column containing the values of the cosine similarity
df_temp['sim']= df_temp.apply(lambda x: cosine_sim(value_vec,x.values), axis=1)
# goes through all the rating vectors in the table
df = pd.merge(df_temp, df, how='inner',on=None, validate=None)
# merge temp df with similarity column into df
# drop the last row
# gets only the X most similar rows
df_rec = df.nlargest(4, columns='sim')
df_final = df_rec.sort_values("sim", ascending=False)
# order by highest similarity value
print("\nFound Similarities:\n", df_final)
return df_final
def calc_total_rating(df, supplier):
filtered_df = df[df['supplier_name'] == supplier]
in_time_count = filtered_df[filtered_df['rating'] == 'in time'].shape[0]
total_count = round((in_time_count/filtered_df.shape[0])*100, 2)
return total_count
def add_ratings(df, df_original):
supplier_dict = df.to_dict()
supplier_list = [{key: s, "ratings": calc_total_rating(df_original, s)} for (key, s) in supplier_dict.items()]
return supplier_list
def supplier_recommend(input_order, df): # df = pd.read_json(r'testratings.json')
df_original = df.copy()
input_order_df = pd.DataFrame([input_order], columns=df.columns)
# append the new row to df
df = pd.concat([df, input_order_df], ignore_index=True)
input_order_index = df.shape[0] - 1
# add index column
df['rating_id'] = df.index
df = preprocess(df)
# call recommendation fct
df_recommendations = recommend(input_order_index, df)
# search supplier name for the recommended IDs, drop duplicates
df_suppliers = pd.merge(df_recommendations, df_original, how='inner',on=None, validate=None)["supplier_name"]
df_suppliers.drop_duplicates(inplace=True)
list_suppliers = add_ratings(df_suppliers, df_original)
return list_suppliers
show_welcome_text()
combinations = get_combinations()
recommended_supplier_list = []
for combi in combinations:
list_suppliers = supplier_recommend(combi, df)
recommended_supplier_list.append([combi, list_suppliers])
print(recommended_supplier_list)
result_file = 'result.json'
with open(result_file, 'w') as f:
json.dump(recommended_supplier_list, f)

Graph

Train Selection