Code fragments of helper.py

Graph

undefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedhelper.py import pandas as pd None from numpy import dot import numpy as np from numpy.linalg import norm import json import itertools def normalize(data): ''' Normalizes input data to be between 0 and 1 ''' min_val = np.nanmin(data) max_val = np.nanmax(data) range_val = max_val - min_val if range_val == 0: return data # min max are the same # normalize only non-NaN values normalized_data = np.where(np.isnan(data), data, (data - min_val) / range_val) return normalized_data def one_hot_encoding(df, enc_col): ''' One hot encodes specified columns and adds them to the input df ''' enc_df = pd.get_dummies(df[enc_col]) enc_df.reset_index(drop = True, inplace = True) return pd.concat([df, enc_df], axis = 1) def cosine_sim(v1,v2): ''' Calculates the cosine similarity between two vectors ''' return dot(v1,v2)/(norm(v1)*norm(v2)) def show_welcome_text(): text = ''' ################################## # SUPPLIER RECOMMENDATION SYSTEM # ################################## ''' print(text) def get_combinations(): ''' Computes all possible combinations of params in parameters.json file ''' with open('parameters.json', 'r') as file: params = json.load(file) combinations_values = list(itertools.product(*params.values())) combinations = [dict(zip(params.keys(), combination)) for combination in combinations_values] return combinations def preprocess(df): # encode categorical features as a one-hot numeric array print("\nDataFrame Merged with Input Order:\n", df) if 'volume' in df.columns: df = one_hot_encoding(df = df, enc_col = 'volume') if 'product_type' in df.columns: df = one_hot_encoding(df = df, enc_col = 'product_type') if 'CO2_report' in df.columns: df = one_hot_encoding(df = df, enc_col = 'CO2_report') if 'transport_mode' in df.columns: df = one_hot_encoding(df = df, enc_col = 'transport_mode') # drop columns that are no longer used cols = ['supplier_name', 'rating', 'product_type', 'volume', 'CO2_report','transport_mode'] # filter out the columns that not exist cols_to_drop = [col for col in cols if col in df.columns] df.drop(columns = cols_to_drop, inplace = True) print("\nNormalized/One-Hot Encoded DataFrame:\n", df) return df