PADME Train Wiki
Train Selection
Search
Trains
SupplierRecommendation
880
main.py
Code fragments of main.py
# See for reference: https://naomy-gomes.medium.com/the-cosine-similarity-and-its-use-in-recommendation-systems-cb2ebd811ce1
import pandas as pd
from helper import preprocess, cosine_sim, get_combinations, show_welcome_text
import psycopg2
import json
import os
''' This is a simple supplier recommendation system using cosine similarity to provide recommendations for a specific rating input based on the the most similar rating entries. '''
# get ratings from database
conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] )
conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] )
cursor = conn.cursor()
cursor.execute("SELECT * FROM supplier_data")
ratings = cursor.fetchall()
column_names = [desc[0] for desc in cursor.description]
df = pd.DataFrame(ratings, columns=column_names)
def recommend(input_order_index, df):
""" Content based recommendations Returns the most similar suppliers """
# create vector and temp df without id columns
df_temp = df.copy()
cols = ['rating_id'] + df_temp.columns[df_temp.iloc[-1].isna()].tolist()
# gets corresponding values of rating_id
value_vec = df.loc[input_order_index]
# drop specified columns
value_vec = value_vec.drop(index = cols)
value_vec = value_vec.values
# gets corresponding values of rating_id
df_temp.drop(columns = cols, inplace = True)
# drop last column
df_temp.drop(df_temp.tail(1).index, inplace=True)
df.drop(df.tail(1).index, inplace=True)
# calculate similarity of input rating_id vector and all other vectors
# create a column containing the values of the cosine similarity
df_temp['sim']= df_temp.apply(lambda x: cosine_sim(value_vec,x.values), axis=1)
# goes through all the rating vectors in the table
df = pd.merge(df_temp, df, how='inner',on=None, validate=None)
# merge temp df with similarity column into df
# drop the last row
# gets only the X most similar rows
df_rec = df.nlargest(4, columns='sim')
df_final = df_rec.sort_values("sim", ascending=False)
# order by highest similarity value
print("\nFound Similarities:\n", df_final)
return df_final
def calc_total_rating(df, supplier):
filtered_df = df[df['supplier_name'] == supplier]
in_time_count = filtered_df[filtered_df['rating'] == 'in time'].shape[0]
total_count = round((in_time_count/filtered_df.shape[0])*100, 2)
return total_count
def add_ratings(df, df_original):
supplier_dict = df.to_dict()
supplier_list = [{key: s, "ratings": calc_total_rating(df_original, s)} for (key, s) in supplier_dict.items()]
return supplier_list
def supplier_recommend(input_order, df): # df = pd.read_json(r'testratings.json')
df_original = df.copy()
input_order_df = pd.DataFrame([input_order], columns=df.columns)
# append the new row to df
df = pd.concat([df, input_order_df], ignore_index=True)
input_order_index = df.shape[0] - 1
# add index column
df['rating_id'] = df.index
df = preprocess(df)
# call recommendation fct
df_recommendations = recommend(input_order_index, df)
# search supplier name for the recommended IDs, drop duplicates
df_suppliers = pd.merge(df_recommendations, df_original, how='inner',on=None, validate=None)["supplier_name"]
df_suppliers.drop_duplicates(inplace=True)
list_suppliers = add_ratings(df_suppliers, df_original)
return list_suppliers
show_welcome_text()
combinations = get_combinations()
recommended_supplier_list = []
for combi in combinations:
list_suppliers = supplier_recommend(combi, df)
recommended_supplier_list.append([combi, list_suppliers])
print(recommended_supplier_list)
result_file = 'result.json'
with open(result_file, 'w') as f:
json.dump(recommended_supplier_list, f)
Graph
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
undefined
main.py
# See for reference: https://naomy-gomes.medium.com/the-cosine-similarity-and-its-use-in-recommendation-systems-cb2ebd811ce1
None
import pandas as pd
from helper import preprocess, cosine_sim, get_combinations, show_welcome_text
import psycopg2
import json
import os
''' This is a simple supplier recommendation system using cosine similarity to provide recommendations for a specific rating input based on the the most similar rating entries. '''
# get ratings from database
conn = psycopg2.connect(database=os.environ["DB_NAME"], user=os.environ["DATA_SOURCE_USERNAME"], host=os.environ["DATA_SOURCE_HOST"], password=os.environ["DATA_SOURCE_PASSWORD"], port=os.environ["DATA_SOURCE_PORT"] )
cursor = conn.cursor()
cursor.execute("SELECT * FROM supplier_data")
ratings = cursor.fetchall()
column_names = [desc[0] for desc in cursor.description]
df = pd.DataFrame(ratings, columns=column_names)
def recommend(input_order_index, df):
""" Content based recommendations Returns the most similar suppliers """
# create vector and temp df without id columns
df_temp = df.copy()
cols = ['rating_id'] + df_temp.columns[df_temp.iloc[-1].isna()].tolist()
# gets corresponding values of rating_id
value_vec = df.loc[input_order_index]
# drop specified columns
value_vec = value_vec.drop(index = cols)
value_vec = value_vec.values
df_temp.drop(columns = cols, inplace = True)
# drop last column
df_temp.drop(df_temp.tail(1).index, inplace=True)
df.drop(df.tail(1).index, inplace=True)
# calculate similarity of input rating_id vector and all other vectors
# create a column containing the values of the cosine similarity
df_temp['sim']= df_temp.apply(lambda x: cosine_sim(value_vec,x.values), axis=1)
# goes through all the rating vectors in the table
df = pd.merge(df_temp, df, how='inner',on=None, validate=None)
# merge temp df with similarity column into df
# drop the last row
# gets only the X most similar rows
df_rec = df.nlargest(4, columns='sim')
df_final = df_rec.sort_values("sim", ascending=False)
# order by highest similarity value
print("\nFound Similarities:\n", df_final)
return df_final
def calc_total_rating(df, supplier):
filtered_df = df[df['supplier_name'] == supplier]
in_time_count = filtered_df[filtered_df['rating'] == 'in time'].shape[0]
total_count = round((in_time_count/filtered_df.shape[0])*100, 2)
return total_count
def add_ratings(df, df_original):
supplier_dict = df.to_dict()
supplier_list = [{key: s, "ratings": calc_total_rating(df_original, s)} for (key, s) in supplier_dict.items()]
return supplier_list
def supplier_recommend(input_order, df): # df = pd.read_json(r'testratings.json')
df_original = df.copy()
input_order_df = pd.DataFrame([input_order], columns=df.columns)
# append the new row to df
df = pd.concat([df, input_order_df], ignore_index=True)
input_order_index = df.shape[0] - 1
# add index column
df['rating_id'] = df.index
df = preprocess(df)
# call recommendation fct
df_recommendations = recommend(input_order_index, df)
# search supplier name for the recommended IDs, drop duplicates
df_suppliers = pd.merge(df_recommendations, df_original, how='inner',on=None, validate=None)["supplier_name"]
df_suppliers.drop_duplicates(inplace=True)
list_suppliers = add_ratings(df_suppliers, df_original)
return list_suppliers
show_welcome_text()
combinations = get_combinations()
recommended_supplier_list = []
for combi in combinations:
list_suppliers = supplier_recommend(combi, df)
recommended_supplier_list.append([combi, list_suppliers])
print(recommended_supplier_list)
result_file = 'result.json'
with open(result_file, 'w') as f:
json.dump(recommended_supplier_list, f)
Search
Train Selection