# -*- coding: utf-8 -*- """ Created on Mon Mar 8 10:38:31 2021 @author: Dijkhofmf """ # Import stuff import os import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import seaborn as sns pd.options.mode.chained_assignment = None # default='warn' #%% Define filenames and path FilenameComplete = 'Complete.csv' FilenameDemo = 'DemoData.csv' Filename_T0 = 'FinalDF_T0.csv' Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles' # Set path os.chdir(Path) DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete)) DFDemo = pd.DataFrame(pd.read_csv(FilenameDemo)) DFDemo['Complete data'] = DFComplete['Has patient completed study?'] DFDemo = DFDemo.drop(DFDemo[DFDemo['Complete data'] !='Yes'].index) DFDemo['ASA-classification'] = DFDemo['ASA-classification'].str.replace('ASA ', '').astype('float64') DFDemo = DFDemo.replace('Unchecked', 0) DFDemo = DFDemo.replace('Checked', 1) Dropcols = ['Year of birth', 'Subject ID Connecare', 'Subject ID Connecare (version 2.0)','Date subject signed consent', 'Nationality', 'Language', 'Former occupation', 'Does the patient have a smartphone that they use?', 'How many days a week is the smartphone used?', 'Does the patient have a tablet that they use?','How many days a week is the tablet used?','Does the patient have a computer/pc that they use?', 'How many days a week is the computer/pc used?','Smart device at home', 'Smart device at inclusion? (check all that apply) (choice=Fitbit)', 'Smart device at inclusion? (check all that apply) (choice=Weight scale)','Indication Surgery', 'Comments', 'Complete?', 'Complete data'] DFDemo = DFDemo.drop(Dropcols, axis=1) DFDemo = DFDemo.set_index('Study ID') # Calculate CCI score DFDemo.iloc[:,20:26] = DFDemo.iloc[:,20:26]*2 DFDemo.iloc[:,26] = DFDemo.iloc[:,26]*3 DFDemo.iloc[:,26:28] = DFDemo.iloc[:,26:28]*6 ColMask = DFDemo.columns[10:29] DFDemo['Comorb'] = DFDemo[ColMask].sum(axis=1) DFDemo = DFDemo.drop(ColMask, axis=1) #%% DF_T0 = pd.DataFrame(pd.read_csv(Filename_T0)) DF_T0 = DF_T0.set_index('Study ID') DFDemo['Type'] = DF_T0['Pt Type'] #%% code variables DFDemo['Gender'] = DFDemo['Gender'].replace('Female', 0) DFDemo['Gender'] = DFDemo['Gender'].replace('Male', 1) Housing = pd.get_dummies(DFDemo['Housing'], drop_first=True) Education = pd.get_dummies(DFDemo['Education'], drop_first=True) Smoking = pd.get_dummies(DFDemo['Smoking'], drop_first=True) Med_Dif = pd.get_dummies(DFDemo['Difficulty preparing medication?'], drop_first=True) Loc_Tu = pd.get_dummies(DFDemo['Location tumour'], drop_first=True) Prim_Mal = pd.get_dummies(DFDemo['Primary Malignancy'], drop_first=True) DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('No', 0) DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('Yes', 1) DFDemo = DFDemo.drop(['Marital State', 'Housing', 'Education', 'Tumour Stage', 'Smoking', 'Difficulty preparing medication?', 'Location tumour', 'Primary Malignancy'], axis=1) #%% DFDemo = pd.concat([DFDemo, Housing, Education, Smoking, Med_Dif, Loc_Tu, Prim_Mal], axis=1) #%% Create Neoadjuvant therapy variable for i,r in DFDemo.iterrows(): if (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1) & (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1): DFDemo.loc[i,'Neo'] = 1 elif DFDemo.loc[i, 'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1: DFDemo.loc[i,'Neo'] = 2 elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Immunotherapy)'] == 1: DFDemo.loc[i,'Neo'] = 3 elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1: DFDemo.loc[i,'Neo'] = 4 elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Targeted Therapy)'] == 1: DFDemo.loc[i,'Neo'] = 5 elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=None)'] == 1: DFDemo.loc[i,'Neo'] = 0 Neo = pd.get_dummies(DFDemo['Neo'], drop_first=True) NeoDrop = ['Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Immunotherapy)', 'Neo-adjuvant therapy (choice=Radiotherapy)', 'Neo-adjuvant therapy (choice=None)', 'Neo-adjuvant therapy (choice=Targeted Therapy)', 'Neo'] DFDemo = DFDemo.drop(NeoDrop, axis=1) DFDemo = pd.concat([DFDemo, Neo], axis=1) #%% plt.figure() sns.displot(DFDemo['Age (years)']) #%% DemoComp = DFDemo[DFDemo['Type'] != 'Healthy'] DemoComp = DemoComp.drop('Type', axis=1) DemoNoComp = DFDemo[DFDemo['Type'] == 'Healthy'] DemoNoComp = DemoNoComp.drop('Type', axis=1) from scipy import stats #outcome = pd.DataFrame(index=['stat', 'p-value']) outcomeT = stats.ttest_ind(DemoNoComp, DemoComp, nan_policy='omit') OutcomeT = outcomeT[1].tolist() OutcomeMW = [] for column in DemoComp: print(column) outcomeMW = stats.mannwhitneyu(DemoNoComp[column], DemoComp[column]) OutcomeMW.append(outcomeMW[1]) #DFDemo.to_csv('FinalDemo.csv')