PADME Train Wiki

Train Selection

Trains
SupplierRecommendation
880
helper.py

Code fragments of helper.py

import pandas as pd
from numpy import dot
import numpy as np
from numpy.linalg import norm
import json
import itertools
def normalize(data):
''' Normalizes input data to be between 0 and 1 '''
min_val = np.nanmin(data)
max_val = np.nanmax(data)
range_val = max_val - min_val
if range_val == 0:
return data
# min max are the same
# normalize only non-NaN values
normalized_data = np.where(np.isnan(data), data, (data - min_val) / range_val)
return normalized_data
def one_hot_encoding(df, enc_col):
''' One hot encodes specified columns and adds them to the input df '''
enc_df = pd.get_dummies(df[enc_col])
enc_df.reset_index(drop = True, inplace = True)
return pd.concat([df, enc_df], axis = 1)
def cosine_sim(v1,v2):
''' Calculates the cosine similarity between two vectors '''
return dot(v1,v2)/(norm(v1)*norm(v2))
def show_welcome_text():
text = ''' ################################## # SUPPLIER RECOMMENDATION SYSTEM # ################################## '''
print(text)
def get_combinations():
''' Computes all possible combinations of params in parameters.json file '''
with open('parameters.json', 'r') as file:
params = json.load(file)
combinations_values = list(itertools.product(*params.values()))
combinations = [dict(zip(params.keys(), combination)) for combination in combinations_values]
return combinations
def preprocess(df): # encode categorical features as a one-hot numeric array
print("\nDataFrame Merged with Input Order:\n", df)
if 'volume' in df.columns:
df = one_hot_encoding(df = df, enc_col = 'volume')
if 'product_type' in df.columns:
df = one_hot_encoding(df = df, enc_col = 'product_type')
if 'CO2_report' in df.columns:
df = one_hot_encoding(df = df, enc_col = 'CO2_report')
if 'transport_mode' in df.columns:
df = one_hot_encoding(df = df, enc_col = 'transport_mode')
# drop columns that are no longer used
cols = ['supplier_name', 'rating', 'product_type', 'volume', 'CO2_report','transport_mode']
# filter out the columns that not exist
cols_to_drop = [col for col in cols if col in df.columns]
df.drop(columns = cols_to_drop, inplace = True)
print("\nNormalized/One-Hot Encoded DataFrame:\n", df)
return df

Graph

Train Selection