the rest of the scripts

2021-07-01 15:22:36 +02:00
parent bb4d0014b6
commit 2d50faeff9
5 changed files with 895 additions and 0 deletions
--- a/DatesParser.py
+++ b/DatesParser.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr  2 10:57:40 2021
+
+@author: Dijkhofmf
+"""
+
+import os
+import pandas as pd
+
+pd.options.mode.chained_assignment = None  # default='warn'
+
+Path = r'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
+
+os.chdir(Path)
+
+
+FilenameOutc = 'SurgAdmComp.csv'
+FilenameSACM = 'DataSACM.csv'
+FilenameComplet = 'Complete.csv'
+
+
+DFComp = pd.DataFrame(pd.read_csv(FilenameOutc))
+DFComp = DFComp.set_index('Study ID')
+DFSACM = pd.DataFrame(pd.read_csv(FilenameSACM))
+DFSACM = DFSACM.set_index('Study ID')
+DFComplet = pd.DataFrame(pd.read_csv(FilenameComplet))
+DFComplet = DFComplet.set_index('Study ID')
+
+
+Startdate = pd.to_datetime(DFSACM['Start date Fitbit']).dt.date
+Enddate = pd.to_datetime(DFSACM['End date Fitbit']).dt.date
+
+DFDates = pd.DataFrame()
+DFDates['Study ID'] = DFComp.index
+DFDates = DFDates.set_index('Study ID')
+DFDates['Start'] = Startdate
+DFDates['Surgery'] = pd.to_datetime(DFComp['Date of surgery']).dt.date
+DFDates['Preop'] = DFDates['Surgery'] - DFDates['Start']
+DFDates['Discharge'] = pd.to_datetime(DFComp['Date of hospital discharge']).dt.date
+DFDates['LOS'] = DFDates['Discharge'] - DFDates['Surgery']
+DFDates['St2Dis'] = DFDates['Discharge'] - DFDates['Start']
+DFDates['First Comp'] = pd.to_datetime(DFComp['Date first complication at home']).dt.date
+DFDates['T2C'] = DFDates['First Comp'] - DFDates['Discharge']
+DFDates['First Read'] = pd.to_datetime(DFComp['Date (first) readmission']).dt.date
+DFDates['T2R'] = DFDates['First Read'] - DFDates['Discharge']
+DFDates['Sec Read'] = pd.to_datetime(DFComp['Date second readmission']).dt.date
+DFDates['T2SR'] = DFDates['Sec Read'] - DFDates['Discharge']
+DFDates['End'] = Enddate
+DFDates['Length'] = DFDates['End'] - DFDates['Start']
+
+DFDates = DFDates[DFComplet['Has patient completed study?']=='Yes']
+
+DFDates.to_csv('Dates.csv')
--- a/DemoParser.py
+++ b/DemoParser.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  8 10:38:31 2021
+
+@author: Dijkhofmf
+"""
+
+# Import stuff
+import os
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+pd.options.mode.chained_assignment = None  # default='warn'
+
+#%% Define filenames and path
+
+
+FilenameComplete = 'Complete.csv'
+FilenameDemo = 'DemoData.csv'
+Filename_T0 = 'FinalDF_T0.csv'
+
+
+Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
+
+# Set path
+os.chdir(Path)
+
+DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete))
+
+DFDemo = pd.DataFrame(pd.read_csv(FilenameDemo))
+DFDemo['Complete data'] = DFComplete['Has patient completed study?']
+DFDemo = DFDemo.drop(DFDemo[DFDemo['Complete data'] !='Yes'].index)
+
+DFDemo['ASA-classification'] = DFDemo['ASA-classification'].str.replace('ASA ', '').astype('float64')
+DFDemo = DFDemo.replace('Unchecked', 0)
+DFDemo = DFDemo.replace('Checked', 1)
+Dropcols = ['Year of birth', 'Subject ID Connecare', 'Subject ID Connecare (version 2.0)','Date subject signed consent', 'Nationality', 'Language', 'Former occupation',
+             'Does the patient have a smartphone that they use?', 'How many days a week is the smartphone used?', 
+             'Does the patient have a tablet that they use?','How many days a week is the tablet used?','Does the patient have a computer/pc that they use?',
+             'How many days a week is the computer/pc used?','Smart device at  home', 'Smart device at inclusion? (check all that apply) (choice=Fitbit)',
+             'Smart device at inclusion? (check all that apply) (choice=Weight scale)','Indication Surgery', 'Comments', 'Complete?', 'Complete data']
+DFDemo = DFDemo.drop(Dropcols, axis=1) 
+DFDemo = DFDemo.set_index('Study ID')
+
+# Calculate CCI score 
+DFDemo.iloc[:,20:26] = DFDemo.iloc[:,20:26]*2
+DFDemo.iloc[:,26] = DFDemo.iloc[:,26]*3
+DFDemo.iloc[:,26:28] = DFDemo.iloc[:,26:28]*6
+
+ColMask = DFDemo.columns[10:29]
+DFDemo['Comorb'] = DFDemo[ColMask].sum(axis=1)
+DFDemo = DFDemo.drop(ColMask, axis=1)
+
+#%%
+
+DF_T0 = pd.DataFrame(pd.read_csv(Filename_T0))
+DF_T0 = DF_T0.set_index('Study ID')
+
+DFDemo['Type'] = DF_T0['Pt Type']
+
+ #%% code variables
+
+DFDemo['Gender'] = DFDemo['Gender'].replace('Female', 0)
+DFDemo['Gender'] = DFDemo['Gender'].replace('Male', 1)
+
+Housing = pd.get_dummies(DFDemo['Housing'], drop_first=True)
+Education = pd.get_dummies(DFDemo['Education'], drop_first=True)
+Smoking = pd.get_dummies(DFDemo['Smoking'], drop_first=True)
+Med_Dif = pd.get_dummies(DFDemo['Difficulty preparing medication?'], drop_first=True)
+Loc_Tu = pd.get_dummies(DFDemo['Location tumour'], drop_first=True)
+Prim_Mal = pd.get_dummies(DFDemo['Primary Malignancy'], drop_first=True)
+
+DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('No', 0)
+DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('Yes', 1)
+
+DFDemo = DFDemo.drop(['Marital State', 'Housing', 'Education', 'Tumour Stage', 'Smoking', 'Difficulty preparing medication?',
+                     'Location tumour', 'Primary Malignancy'], axis=1)
+
+
+#%%
+DFDemo = pd.concat([DFDemo, Housing, Education, Smoking, Med_Dif, Loc_Tu, Prim_Mal], axis=1)
+
+#%% Create Neoadjuvant therapy variable
+
+for i,r in DFDemo.iterrows():
+    if (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1) & (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1):
+        DFDemo.loc[i,'Neo'] = 1
+    elif DFDemo.loc[i, 'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1:
+        DFDemo.loc[i,'Neo'] = 2
+    elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Immunotherapy)'] == 1:
+        DFDemo.loc[i,'Neo'] = 3
+    elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1:
+        DFDemo.loc[i,'Neo'] = 4
+    elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Targeted Therapy)'] == 1:
+        DFDemo.loc[i,'Neo'] = 5
+    elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=None)'] == 1:
+      DFDemo.loc[i,'Neo'] = 0
+
+Neo = pd.get_dummies(DFDemo['Neo'], drop_first=True)
+
+NeoDrop = ['Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Immunotherapy)', 
+            'Neo-adjuvant therapy (choice=Radiotherapy)', 'Neo-adjuvant therapy (choice=None)', 'Neo-adjuvant therapy (choice=Targeted Therapy)', 'Neo']
+
+DFDemo = DFDemo.drop(NeoDrop, axis=1)
+
+DFDemo = pd.concat([DFDemo, Neo], axis=1)
+    
+#%%
+plt.figure()
+sns.displot(DFDemo['Age (years)'])
+
+#%%
+
+DemoComp = DFDemo[DFDemo['Type'] != 'Healthy']
+DemoComp = DemoComp.drop('Type', axis=1)
+DemoNoComp = DFDemo[DFDemo['Type'] == 'Healthy']
+DemoNoComp = DemoNoComp.drop('Type', axis=1) 
+
+from scipy import stats 
+
+#outcome = pd.DataFrame(index=['stat', 'p-value'])
+outcomeT = stats.ttest_ind(DemoNoComp, DemoComp, nan_policy='omit')
+
+OutcomeT = outcomeT[1].tolist()
+
+OutcomeMW = []
+for column in DemoComp:
+    print(column)
+    outcomeMW = stats.mannwhitneyu(DemoNoComp[column], DemoComp[column])
+    OutcomeMW.append(outcomeMW[1])
+
+
+#DFDemo.to_csv('FinalDemo.csv')
--- a/FinalDF_Parser.py
+++ b/FinalDF_Parser.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 25 11:13:35 2021
+
+@author: Dijkhofmf
+"""
+# Import stuff
+import os
+import pandas as pd
+import numpy as np
+#import seaborn as sns
+#import matplotlib.pyplot as plt
+
+pd.options.mode.chained_assignment = None  # default='warn'
+
+#%% Define filenames and path
+
+Filename_T0 = 'BaselineT0.csv'
+Filename_T1 = 'DischargeT1.csv'
+#Filename_T2 = 'FollowUpT2Data.csv'
+FilenameOutc = 'SurgAdmComp.csv'
+FilenameComplete = 'Complete.csv'
+
+Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
+
+# Set path
+os.chdir(Path)
+
+DFT0 = pd.DataFrame(pd.read_csv(Filename_T0))
+DFT1 = pd.DataFrame(pd.read_csv(Filename_T1))
+#DFT2 = pd.DataFrame(pd.read_csv(Filename_T2))
+DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete))
+DFCompl = pd.DataFrame(pd.read_csv(FilenameOutc))
+
+#%%
+
+DFT0['Complete'] = DFComplete['Has patient completed study?']
+DFT0 = DFT0.drop(DFT0[DFT0['Complete'] !='Yes'].index)
+DFT0 = DFT0.astype('str')
+DFT0 = DFT0.set_index(['Study ID'])
+
+DFT1['Complete'] = DFComplete['Has patient completed study?']
+DFT1 = DFT1.drop(DFT1[DFT1['Complete'] !='Yes'].index)
+DFT1 = DFT1.astype('str')
+DFT1 = DFT1.set_index(['Study ID'])
+
+# DFT2['Complete data'] = DFComplete['Has patient completed study?']
+# DFT2 = DFT2.drop(DFT2[DFT2['Complete data'] !='Yes'].index)
+# DFT2 = DFT2.astype('str')
+# DFT2 = DFT2.set_index(['Study ID'])
+
+DFCompl['Complete'] = DFComplete['Has patient completed study?']
+DFCompl = DFCompl.drop(DFCompl[DFCompl['Complete'] !='Yes'].index)
+DFCompl = DFCompl.set_index(['Study ID'])
+
+#%%
+DFT0 = DFT0.apply(lambda x: x.str.replace(',','.'), axis=1)
+DFT1 = DFT1.apply(lambda x: x.str.replace(',','.'), axis=1)
+#DFT2 = DFT2.apply(lambda x: x.str.replace(',','.'), axis=1)
+
+#%%
+FinalDF_T0 = pd.DataFrame()
+FinalDF_T0[['BMI','GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT0[['BMI', 'Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
+
+FinalDF_T1 = pd.DataFrame()
+FinalDF_T1[['GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT1[['Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
+
+FinalDF_T2 = pd.DataFrame()
+#FinalDF_T2[['GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT2[['Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
+
+#%% TUG_T0
+FinalDF_T0['TUG1'] = DFT0['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(0)
+FinalDF_T0['TUG2'] = DFT0['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(0)
+
+for i, r in FinalDF_T0.iterrows():
+    if FinalDF_T0.loc[i,'TUG1'] != 0 and FinalDF_T0.loc[i,'TUG2'] != 0:
+        FinalDF_T0.loc[i,'TUGTot'] = (FinalDF_T0.loc[i,'TUG1']+FinalDF_T0.loc[i,'TUG2'])/2
+    else:
+        FinalDF_T0.loc[i,'TUGTot'] = (FinalDF_T0.loc[i,'TUG1']+FinalDF_T0.loc[i,'TUG2'])/1
+      
+FinalDF_T0['TUG1'] = FinalDF_T0['TUG1'].replace(0, np.nan)
+FinalDF_T0['TUG2'] = FinalDF_T0['TUG2'].replace(0, np.nan)
+FinalDF_T0['TUGTot'] = FinalDF_T0['TUGTot'].replace(0, np.nan)
+
+FinalDF_T0 = FinalDF_T0.drop(['TUG1', 'TUG2'], axis=1)
+
+# TUG_T1 Asuming that all missing data were due to physical disabilties --> NaNs to 30 seconds
+FinalDF_T1['TUG1'] = DFT1['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(30) 
+FinalDF_T1['TUG2'] = DFT1['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(30)
+
+FinalDF_T1['TUGTot'] = (FinalDF_T1['TUG1']+FinalDF_T1['TUG2'])/2
+
+FinalDF_T1 = FinalDF_T1.drop(['TUG1', 'TUG2'], axis=1)
+
+
+# TUG_T2
+#FinalDF_T2['TUG1'] = DFT2['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(0)
+#FinalDF_T2['TUG2'] = DFT2['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(0)
+
+# for i, r in FinalDF_T2.iterrows():
+#     if FinalDF_T2.loc[i,'TUG1'] != 0 and FinalDF_T2.loc[i,'TUG2'] != 0:
+#         FinalDF_T2.loc[i,'TUGTot'] = (FinalDF_T2.loc[i,'TUG1']+FinalDF_T2.loc[i,'TUG2'])/2
+#     else:
+#         FinalDF_T2.loc[i,'TUGTot'] = (FinalDF_T2.loc[i,'TUG1']+FinalDF_T2.loc[i,'TUG2'])/1
+      
+# FinalDF_T2['TUG1'] = FinalDF_T2['TUG1'].replace(0, np.nan)
+# FinalDF_T2['TUG2'] = FinalDF_T2['TUG2'].replace(0, np.nan)
+# FinalDF_T2['TUGTot'] = FinalDF_T2['TUGTot'].replace(0, np.nan)
+
+#%%
+FinalDF_T0[['HGSR1','HGSR2','HGSR3']] = DFT0[['Handgrip Strength test Attempt 1 rigth','Handgrip Strength test Attempt 2 rigth','Handgrip Strength test Attempt 3 right']].astype('float64')
+FinalDF_T0['HGSRAvg'] = (FinalDF_T0['HGSR1'] + FinalDF_T0['HGSR2'] + FinalDF_T0['HGSR3'])/3
+FinalDF_T0[['HGSL1','HGSL2','HGSL3']] = DFT0[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
+FinalDF_T0['HGSLAvg'] = (FinalDF_T0['HGSL1'] + FinalDF_T0['HGSL2'] + FinalDF_T0['HGSL3'])/3
+FinalDF_T0['Dominance'] = DFT0['Hand dominance']
+
+           
+FinalDF_T1[['HGSR1','HGSR2','HGSR3']] = DFT1[['Handgrip Strength test Attempt 1 rigth','Handgrip Strength test Attempt 2 rigth','Handgrip Strength test Attempt 3 right']].astype('float64')
+FinalDF_T1['HGSRAvg'] = (FinalDF_T1['HGSR1'] + FinalDF_T1['HGSR2'] + FinalDF_T1['HGSR3'])/3
+FinalDF_T1[['HGSL1','HGSL2','HGSL3']] = DFT1[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
+FinalDF_T1['HGSLAvg'] = (FinalDF_T1['HGSL1'] + FinalDF_T1['HGSL2'] + FinalDF_T1['HGSL3'])/3
+
+for i, r in DFT1.iterrows():
+    if DFT1.loc[i,'Handgrip Strength Test'] == 'No':
+         FinalDF_T1.loc[i,['HGSR1','HGSR2','HGSR3','HGSRAvg','HGSL1','HGSL2','HGSL3','HGSLAvg']] = 0
+
+for index, rows in FinalDF_T1.iterrows():
+    
+    if FinalDF_T0.loc[index, 'Dominance'] == 'Rigth':
+        FinalDF_T0.loc[index, 'HGSDom'] = FinalDF_T0.loc[index,'HGSRAvg'] 
+        FinalDF_T1.loc[index, 'HGSDom'] = FinalDF_T1.loc[index,'HGSRAvg'] 
+    elif FinalDF_T0.loc[index, 'Dominance'] == 'Left':
+        FinalDF_T0.loc[index, 'HGSDom'] = FinalDF_T0.loc[index,'HGSLAvg'] 
+        FinalDF_T1.loc[index, 'HGSDom'] = FinalDF_T1.loc[index,'HGSLAvg'] 
+    else: 
+        FinalDF_T0.loc[index, 'HGSDom'] = (FinalDF_T0.loc[index,'HGSRAvg']+FinalDF_T0.loc[index,'HGSLAvg'])/2
+        FinalDF_T1.loc[index, 'HGSDom'] = (FinalDF_T1.loc[index,'HGSRAvg']+FinalDF_T1.loc[index,'HGSLAvg'])/2
+
+FinalDF_T0 = FinalDF_T0.drop(['HGSR1', 'HGSR2', 'HGSR3', 'HGSRAvg','HGSL1', 'HGSL2', 'HGSL3', 'HGSLAvg', 'Dominance'], axis=1)
+FinalDF_T1 = FinalDF_T1.drop(['HGSR1', 'HGSR2', 'HGSR3', 'HGSRAvg','HGSL1', 'HGSL2', 'HGSL3', 'HGSLAvg'], axis=1)
+
+# FinalDF_T2[['HGSR1','HGSR2','HGSR3']] = DFT2[['Handgrip Strength test Attempt 1 right','Handgrip Strength test Attempt 2 right','Handgrip Strength test Attempt 3 right']].astype('float64')
+# FinalDF_T2['HGSRAvg'] = (FinalDF_T2['HGSR1'] + FinalDF_T2['HGSR2'] + FinalDF_T2['HGSR3'])/3
+# FinalDF_T2[['HGSL1','HGSL2','HGSL3']] = DFT2[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
+# FinalDF_T2['HGSLAvg'] = (FinalDF_T2['HGSL1'] + FinalDF_T2['HGSL2'] + FinalDF_T2['HGSL3'])/3
+
+# if FinalDF_T0['Dominance'] == 'Right':
+#     FinalDF_T2['HSGDom'] = FinalDF_T0['HGSRAvg']
+# elif FinalDF_T0['Dominance'] == 'Left':
+#     FinalDF_T2['HSGDom'] = FinalDF_T0['HGSLAvg']
+# else: 
+#     FinalDF_T2['HGSDom'] = (FinalDF_T0['HGSRAvg']+FinalDF_T0['HGSLAvg'])/2
+
+#%%
+EORTCCols = DFT0.columns[15:59].tolist()
+EORTCScoresT0 = DFT0[EORTCCols]
+#EORTCScoresT2 = DFT2[EORTCCols]
+
+
+#%%
+os.chdir('I:\Mike Dijkhof\Python')
+
+import EORTC as eor
+import SQUASH as sq
+
+NewEORTCScoresT0 = eor.EORTCCalculator(EORTCScoresT0, EORTCCols)
+#NewEORTCScoresT2 = eor.EORTCCalculator(EORTCScoresT2, EORTCCols)
+
+EORTCT0 = eor.EORTCScore(EORTCScoresT0)
+#EORTCT2 = eor.EORTCScore(EORTCScoresT2)
+
+os.chdir(Path)
+
+#%% plaatjes
+
+# for index, row in EORTC.iterrows():
+#     plt.figure(figsize=(20,8))
+#     plt.title('EORTC preoperative outcomes pt ' + str(index))
+#     sns.barplot(x=EORTC.columns, y=EORTC.loc[index,:])
+
+#%%
+
+SQUASHScoresT0 = sq.SQUASHParse(DFT0)
+#SQUASHScoresT2 = sq.SQUASHParse(DFT2)
+
+ColsToDrop = ['SQUASH baseline afgenomen?', 'Woon werkverkeer?', 'Werk?', 'Huishoudelijk werk?']
+
+SQUASHScoresT0 = SQUASHScoresT0.drop(ColsToDrop, axis=1)
+SQUASHScoresT0 = SQUASHScoresT0.astype('float64')
+
+#SQUASHScoresT2 = SQUASHScoresT2.drop(ColsToDrop, axis=1)
+#SQUASHScoresT2 = SQUASHScoresT2.astype('float64')
+
+SQUASHT0 = sq.SQUASHScore(SQUASHScoresT0)
+#SQUASHT2 = sq.SQUASHScore(SQUASHScoresT2)
+
+#%%
+FinalDF_T0['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
+FinalDF_T1['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
+#FinalDF_T2['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
+
+
+FinalDF_T0['Pt Type'] = FinalDF_T0['Pt Type'].str.replace('Yes', 'Complication')
+FinalDF_T0['Pt Type'] = FinalDF_T0['Pt Type'].str.replace('No', 'Healthy')
+
+FinalDF_T1['Pt Type'] = FinalDF_T1['Pt Type'].str.replace('Yes', 'Complication')
+FinalDF_T1['Pt Type'] = FinalDF_T1['Pt Type'].str.replace('No', 'Healthy')
+
+#FinalDF_T2['Pt Type'] = FinalDF_T2['Pt Type'].str.replace('Yes', 'Complication')
+#FinalDF_T2['Pt Type'] = FinalDF_T2['Pt Type'].str.replace('No', 'Healthy')
+
+
+#%% Save FinalDF to .csv file 
+
+FinalDF_T0.to_csv('FinalDF_T0.csv')
+FinalDF_T1.to_csv('FinalDF_T1.csv')
+#FinalDF_T2.to_csv('FinalDF_T2.csv')
--- a/PAParser.py
+++ b/PAParser.py
@@ -0,0 +1,415 @@
+# -*- coding: utf-8 -*-
+"""
+Script for parsing the Fitbit data into graphs.
+@author M.F. Dijkhof
+"""
+# Import stuff
+import os
+import pandas as pd
+import seaborn as sns
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Disable copy overwrite warning
+pd.options.mode.chained_assignment = None  # default='warn'
+
+#%% Define filenames and path
+
+FilenameComp = 'SurgeryAndAdmission2.csv' #Surg and Adm + Complications
+FilenamePA = 'PA_Data.csv'
+FilenameSteps = 'StepData.csv'
+FilenameComplete = 'Complete.csv'
+FilenameOutcome = 'Complications.csv'
+
+Path = 'I:\Mike Dijkhof\Connecare MGP\Data'
+
+# Set path
+os.chdir(Path)
+
+#%% Create DF from files 
+
+DFComp = pd.DataFrame(pd.read_csv(FilenameComp))
+DFPA = pd.DataFrame(pd.read_csv(FilenamePA))
+DFSteps = pd.DataFrame(pd.read_csv(FilenameSteps))
+DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete))
+DFOutcome = pd.DataFrame(pd.read_csv(FilenameOutcome))
+
+DFComp = DFComp.set_index('Study ID')
+DFPA = DFPA.set_index('Study ID')
+DFSteps = DFSteps.set_index('Study ID')
+DFComplete = DFComplete.set_index('Study ID')
+DFOutcome = DFOutcome.set_index('Study ID')
+
+#%%
+# Clear all uncomplete cases
+CompleteCheck= DFComplete['Has patient completed study?'] == 'Yes'
+
+DFComp = DFComp[CompleteCheck]
+DFPA = DFPA[CompleteCheck]
+DFOutcome = DFOutcome[CompleteCheck]
+DFSteps = DFSteps[CompleteCheck]
+
+
+# Transpose PA data into the right format
+NewDF= pd.DataFrame(DFPA.iloc[0]).transpose()
+
+counter = range(1, len(DFPA))
+
+for i in counter:
+    NewRow = DFPA.iloc[i].transpose()
+    NewDF = NewDF.append(NewRow)
+
+NewDF = NewDF.drop(['Complete?'], axis=1)
+
+# Do the same for Step data 
+NewStepDF = pd.DataFrame(DFSteps.iloc[0]).transpose()
+
+counter = range(1, len(DFSteps))
+
+for i in counter:
+    NewRow = DFSteps.iloc[i].transpose()
+    NewStepDF = NewStepDF.append(NewRow)
+
+NewStepDF = NewStepDF.drop(['Complete?'], axis=1)
+
+#%% Create DF with important dates
+
+DFDates = DFComp [['Date of surgery','Date of hospital discharge', 
+'Date first complication at home', 'Date (first) readmission', 
+'Date discharge after first readmission', 'Date second readmission',
+'Date discharge second readmission']]
+
+for i in DFDates:
+    DFDates[i] = pd.to_datetime(DFDates[i]).dt.date
+    
+DFDates['LOS'] = DFDates['Date of hospital discharge'] - DFDates['Date of surgery'] #LOS = Length of stay
+DFDates['TTC'] = DFDates['Date first complication at home'] - DFDates['Date of surgery'] #TTC = Time to complication
+DFDates['TTR'] = DFDates['Date (first) readmission'] - DFDates['Date of surgery'] #TTR = Time to readmission
+DFDates['TT2R'] = DFDates['Date second readmission'] - DFDates['Date of surgery'] #TT2R = Time to second readmission
+
+#%% Create coordinates from the dates  for the plots 
+
+AXVcoord = pd.DataFrame(columns= ['LOS', 'TTC', 'TTR', 'TT2R'])
+
+for rows, index in DFDates.iterrows():
+    AXVcoord.loc[rows, 'LOS'] = DFDates['LOS'].loc[rows].days
+    AXVcoord.loc[rows, 'TTC'] = DFDates['TTC'].loc[rows].days
+    AXVcoord.loc[rows, 'TTR'] = DFDates['TTR'].loc[rows].days
+    AXVcoord.loc[rows, 'TT2R'] = DFDates['TT2R'].loc[rows].days
+
+AXVcomb = AXVcoord.values.tolist()
+AXVArray = np.array(AXVcomb)
+
+
+#%% Create DFs for each PA level
+
+NoActDF = NewDF.loc[:, :'No activity After Surgery: 90']
+LowActDF = NewDF.loc[:, 'Low activity Before Surgery: -1 ':'Low activity After Surgery: 90']
+MedActDF = NewDF.loc[:, 'Medium activity Before Surgery: -1':'Medium activity After Surgery: 90']
+HighActDF = NewDF.loc[:, 'High activity Before Surgery: -1 ':'High activity After Surgery: 90']
+
+def MakeStepDF(NewDF):
+    StepDF = NewDF.iloc[:,321:427]
+    StepDF = StepDF.drop('Days Fitbit prescribed after surgery', axis=1)
+    StepDF = StepDF.replace(' ', '')
+    StepDF = StepDF.replace('N.A.', np.nan)
+    StepDF = StepDF.replace('N.A. ', np.nan)
+    StepDF = StepDF.replace('NA.', np.nan)
+    StepDF = StepDF.replace('n.a.', np.nan)
+    StepDF = StepDF.replace('N.A', np.nan)
+    StepDF = StepDF.replace('NaN', np.nan)
+    StepDF = StepDF.astype('float64')
+    return StepDF
+
+StepDF = MakeStepDF(NewStepDF)
+
+#%% Day -14 to surgery were in the wrong order so we have to flip the first 14 days 
+
+def DayFlipper(DF): 
+    ListCol = DF.columns.tolist()
+    ListCol[0:14] = ListCol[0:14][::-1]
+    DF = DF[ListCol]
+    return(DF)
+
+NoActDF = DayFlipper(NoActDF)
+print(NoActDF.columns)
+LowActDF = DayFlipper(LowActDF)
+print(LowActDF.columns)
+MedActDF = DayFlipper(MedActDF)
+print(MedActDF.columns)
+HighActDF = DayFlipper(HighActDF)
+print(HighActDF.columns)
+StepDF = DayFlipper(StepDF)
+print(StepDF.columns)
+
+#%%
+OldColumns = LowActDF.columns
+NewColumns = range(-14, 91)
+
+LowActDF.columns = NewColumns
+MedActDF.columns = NewColumns
+HighActDF.columns = NewColumns
+StepDF.columns = NewColumns
+
+# Set NaN to zeroes in order to calculate the total amount of activity
+LowActDFZeroes = LowActDF.fillna(0)
+MedActDFZeroes = MedActDF.fillna(0)
+HighActDFZeroes = HighActDF.fillna(0)
+StepDFZeroes = StepDF.fillna(0)
+
+TotActDF = LowActDF + MedActDF + HighActDF
+TotActDFZeroes = LowActDFZeroes + MedActDFZeroes + HighActDFZeroes
+
+# Remove pts that reported less than threshold PA days
+Threshold = 200
+
+NaNCount = LowActDF.isnull().sum(axis=1) # Count days without data per patient
+NaNRowDrop = (LowActDF.isnull().sum(axis=1)) < Threshold
+
+NoActDFClean = NoActDF[NaNRowDrop] 
+LowActDFClean = LowActDFZeroes[NaNRowDrop]
+MedActDFClean = MedActDFZeroes[NaNRowDrop]
+HighActDFClean = HighActDFZeroes[NaNRowDrop]
+TotActDFClean = TotActDFZeroes[NaNRowDrop]
+
+#%%
+
+# NoActDFClean['Group'] = 'Complication'
+# LowActDFClean['Group'] = 'Complication'
+# MedActDFClean['Group'] = 'Complication'
+# HighActDFClean['Group'] = 'Complication'
+# TotActDFClean['Group'] = 'Complication'
+# StepDF['Group'] = 'Complication'
+
+def Grouper(DF):
+    DF['Group'] = 'Complication'
+    DF['Group'] = DF['Group'].where(DFOutcome['Complications at home during monitoring ? '] == 'Yes', other='No Comp')
+    return DF
+
+NoActDFClean = Grouper(NoActDFClean)
+LowActDFClean = Grouper(LowActDFClean)
+MedActDFClean = Grouper(MedActDFClean)
+HighActDFClean = Grouper(HighActDFClean)
+TotActDFClean = Grouper(TotActDFClean)
+StepDF = Grouper(StepDF)
+
+# #%% Divide Comps, Non-comps and Unknown-Comps
+
+# LowActComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'Yes']
+# MedActComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'Yes']
+# HighActComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'Yes']
+# TotActComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'Yes']
+
+# LowActNoComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'No']
+# MedActNoComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'No']
+# HighActNoComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'No']
+# TotActNoComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'No']
+
+# LowActUnk = LowActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')]
+# MedActUnk = MedActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')]
+# HighActUnk = HighActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')]
+# TotActUnk = TotActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')]
+
+#%% Plot comps, non-comps amd unknown patient data with event-dates 
+
+colors = ['k','c','r', 'r'] # k=discharge, c=complication, r=readmissions 
+
+def PAPlotter(Low, Med, High, Tot, Step, AXV):
+    
+    for index, row in Tot.iterrows():
+        
+        counter = index-1
+        fig, ax1 = plt.subplots(figsize=(20,8))
+        
+        ax1.plot(Low.loc[index], 'b:')
+        ax1.plot(Med.loc[index], 'r:')
+        ax1.plot(High.loc[index], 'y:')
+        ax1.plot(Tot.loc[index])
+        ax1.set_ylabel('Minutes of PA')
+        ax1.set_xlabel('Days')
+        plt.ylim(0,1440)
+        
+        plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed')
+        plt.vlines(AXV[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted')
+        
+        ax2 = ax1.twinx()
+        ax2.plot(Step.loc[index], 'k')
+        ax2.set_ylabel('Steps per day')
+       
+        plt.title('PA levels comp pt' + str(index))
+        plt.ylim(0,25000)
+    
+PAPlotter(LowActDFClean, MedActDFClean, HighActDFClean, TotActDFClean, StepDF, AXVcomb)
+#PAPlotter(LowActNoComp, MedActNoComp, HighActNoComp,TotActNoComp,  StepDF, AXVcomb, 'No Complication')
+#PAPlotter(LowActUnk, MedActUnk, HighActUnk, TotActUnk, StepDF, AXVcomb, 'Unknown Complication')
+
+    
+#%% Calculate differences between comp PA and no comp PA
+
+def PAStats(DF, group):
+    MeanTotPA = DF.mean().mean()
+    StdTotPA = DF.std().std()
+    PreMean= DF.loc[:,-14:-1].mean().mean()
+    PreStd =  DF.loc[:,-14:-1].std().std()
+    Post30Mean = DF.loc[:,0:30].mean().mean()
+    Post30Std = DF.loc[:,0:30].std().std()
+    Post60Mean = DF.loc[:,0:60].mean().mean()
+    Post60Std = DF.loc[:,0:60].std().std()
+    Post90Mean = DF.loc[:,0:90].mean().mean()
+    Post90Std = DF.loc[:,0:90].std().std()
+    
+    print('Stats '+ group + ':', '\n')
+    print('Total Mean min PA ='+ str(MeanTotPA),'Std=' + str(StdTotPA))
+    print('Preoperative Mean min PA =' + str(PreMean), 'Std=' + str(PreStd))
+    print('30 days Postop. Mean min PA =' + str(Post30Mean), 'Std=' + str(Post30Std))
+    print('60 days Postop. Mean min PA =' + str(Post60Mean), 'Std=' + str(Post60Std))
+    print('90 days Postop. Mean min PA =' + str(Post90Mean), 'Std=' + str(Post90Std),'\n')
+
+PAStats(TotActComp, 'complication')
+PAStats(TotActNoComp, 'no complication')
+PAStats(TotActUnk, 'unkown')
+
+
+#%% Plot histogram number of missing values
+CountDF = pd.DataFrame(NaNCount)
+CountDF['Complication'] = DFCompl['Complications at home during monitoring ? ']
+CountDF.columns = ['Count', 'Complication']
+  
+sns.displot(CountDF, x='Count', bins=[10, 20, 30, 40, 50, 60, 70, 80, 90], hue='Complication')
+sns.color_palette ('colorblind')
+
+#%%
+
+def RollingAvAct(DF, windowsize):
+    AvDF = pd.DataFrame()
+    
+    for index, row in DF.iterrows():
+        AvDF = AvDF.append(row.rolling(windowsize, min_periods=1).mean())
+    return(AvDF)
+
+AvTotActComp =pd.DataFrame(RollingAvAct(TotActComp, 3))
+AvTotActNoComp = pd.DataFrame(RollingAvAct(TotActNoComp, 3))
+
+    
+#%%
+def Trendliner(DF, Dates, group):
+    newPASlopePre = pd.DataFrame(columns=['Slope', 'Int', 'Group'])
+    newPASlopeLOS = pd.DataFrame(columns=['Slope', 'Int', 'Group'])
+    newPASlopePost = pd.DataFrame(columns=['Slope', 'Int', 'Group'])
+    
+    for index, row in DF.iterrows():
+        
+        counter = index-1
+        DisDay = int(AXVArray[counter,0])    
+        DisDay2 = int(DisDay+15)
+        DisDay3 = int(DisDay2-1) 
+        
+        # Calculate trendline pre-op
+        Xpre = DF.columns[0:15]
+        Ypre = DF.loc[index,-14:0]
+        z_pre = np.polyfit(Xpre, Ypre, 1)
+        p_pre = np.poly1d(z_pre)
+        newPASlopePre.loc[index,'Slope'] = z_pre[0]
+        newPASlopePre.loc[index,'Int'] = z_pre[1]
+        newPASlopePre.loc[index, 'Group'] = group
+        
+        # Calculate trendline LOS
+        Xlos = DF.columns[14:DisDay2]
+        Ylos = DF.loc[index,0:DisDay]
+        z_los = np.polyfit(Xlos, Ylos, 1)
+        p_los = np.poly1d(z_los)
+        newPASlopeLOS.loc[index,'Slope'] = z_los[0]
+        newPASlopeLOS.loc[index,'Int'] = z_los[1]
+        newPASlopeLOS.loc[index, 'Group'] = group
+        
+       # Calculate trendline post-op
+        Xpost = DF.columns[DisDay3:]
+        Ypost = DF.loc[index,DisDay:]
+        z_post = np.polyfit(Xpost, Ypost, 1)
+        p_post = np.poly1d(z_post)
+        newPASlopePost.loc[index,'Slope'] = z_post[0]
+        newPASlopePost.loc[index,'Int'] = z_post[1]
+        newPASlopePost.loc[index, 'Group'] = group
+        
+        # Plot figures
+        plt.figure(figsize=(24,8))
+        plt.plot(DF.loc[index])
+        plt.plot(Xpost,p_post(Xpost),'r--')
+        plt.plot(Xpre, p_pre(Xpre), 'b--')
+        plt.plot(Xlos, p_los(Xlos), 'k--')
+        plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed')
+        plt.vlines(Dates[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted')
+        plt.xlim(-14,105)
+        plt.ylim(0,1440)
+        plt.ylabel('Minutes of PA')
+        plt.xlabel('Days')
+        plt.title('Mov Avg PA levels pt' + str(index) + '_' + group)
+        
+    d = {'Pre': newPASlopePre, 'LOS':newPASlopeLOS, 'Post': newPASlopePost}
+       
+    return(d)
+
+TrendDictComp = Trendliner(AvTotActComp, AXVcomb, 'complication')
+TrendDictNoComp= Trendliner(AvTotActNoComp, AXVcomb, 'no complication')
+
+
+#%%
+
+# def SlopeStats(SlopeDict, group):
+#     MeanSlopePre, MeanIntPre = SlopeDict['Pre'].mean()
+#     StdSlopePre, StdIntPre = SlopeDict['Pre'].std()
+#     MeanSlopeLOS, MeanIntLOS = SlopeDict['LOS'].mean()
+#     StdSlopeLOS, StdIntLOS = SlopeDict['LOS'].std()
+#     MeanSlopePost, MeanIntPost = SlopeDict['Post'].mean()
+#     StdSlopePost, StdIntPost = SlopeDict['Post'].std()
+   
+#     print('Stats '+ group + ':', '\n')
+#     print('Mean slope PA Pre-op = '+ str(MeanSlopePre),'Std= ' + str(StdSlopePre))
+#     print('Mean slope PA hospitalization = '+ str(MeanSlopeLOS),'Std= ' + str(StdSlopeLOS))
+#     print('Mean slope PA Post-op = '+ str(MeanSlopePost),'Std= ' + str(StdSlopePost))
+#     print('Mean intersept PA Pre-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre))
+#     print('Mean intercept PA hospitalization = '+ str(MeanIntLOS),'Std= ' + str(StdIntLOS))
+#     print('Mean intercept PA Post-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre), '\n')
+   
+#     return(MeanSlopePre, StdSlopePre, MeanSlopeLOS, StdSlopeLOS, MeanSlopePost, StdSlopePost)
+
+# MeanSlopePreComp, StdSlopePreComp, MeanSlopeLOSComp, StdSlopLOSComp, MeanSlopePostComp, StdSlopeComp,  = SlopeStats(TrendDictComp, 'complications')
+# MeanSlopePreNoComp, StdSlopePreNoComp, MeanSlopeLOSNoComp, StdSlopLOSNoComp, MeanSlopePostNoComp, StdSlopeNoComp = SlopeStats(TrendDictNoComp, 'no complications')
+
+
+#%%
+
+# SlopeIntPreComp = pd.DataFrame(TrendDictComp['Pre'])
+# SlopeIntPreComp['Period'] = 'Pre'
+# SlopeIntPreNoComp= pd.DataFrame(TrendDictNoComp['Pre'])
+# SlopeIntPreNoComp['Period'] = 'Pre'
+# SlopeIntLOSComp = pd.DataFrame(TrendDictComp['LOS'])
+# SlopeIntLOSComp['Period'] = 'LOS'
+# SlopeIntLOSNoComp= pd.DataFrame(TrendDictNoComp['LOS'])
+# SlopeIntLOSNoComp['Period'] = 'LOS'
+
+# SlopeIntPostComp = pd.DataFrame(TrendDictComp['Post'])
+# SlopeIntPostComp['Period'] = 'Post'
+# SlopeIntPostNoComp= pd.DataFrame(TrendDictNoComp['Post'])
+# SlopeIntPostNoComp['Period'] = 'Post'
+
+# Slope = pd.DataFrame()
+# Slope = Slope.append([SlopeIntPreComp, SlopeIntPreNoComp, SlopeIntLOSComp, SlopeIntLOSNoComp, SlopeIntPostComp, SlopeIntPostNoComp])
+# Slope['Slope'] = Slope['Slope'].astype('float64')
+# Slope['Int'] = Slope['Int'].astype('float64')
+
+#%%
+# plt.figure(figsize=(12,8))
+# sns.set_theme(style="darkgrid")
+# sns.violinplot(x=Slope['Period'], y=Slope['Slope'],hue=Slope['Group'], palette="muted", split=True)
+# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
+
+# plt.figure(figsize=(12,8))
+# sns.set_theme(style="darkgrid")
+# sns.violinplot(x=Slope['Period'], y=Slope['Int'],hue=Slope['Group'], palette="muted", split=True)
+# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
+
+#%%
+#fig, axes = plt.subplots(1,2, sharey=True)
+#sns.violinplot(data=newPASlopeComp['Intercept'], ax=axes[0], color='b')
+#sns.violinplot(data=newPASlopeNoComp['Intercept'], ax=axes[1], color='r')
--- a/ScatterBoxplotter.py
+++ b/ScatterBoxplotter.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 14 09:18:32 2021
+
+@author: Dijkhofmf
+"""
+
+# Import stuff
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn import preprocessing
+
+#%%  Import data and path
+
+Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
+
+# Set path
+os.chdir(Path)
+
+#%% Create DF
+FinalDF = pd.DataFrame(pd.read_csv('FinalDataset.csv'))
+
+X = pd.DataFrame(FinalDF)
+
+cols = X.drop('Pt Type', axis=1)
+
+
+ID = X['Study ID']
+y = X['Pt Type']
+y= y.replace('Healthy', 'No-complication')
+X = X.drop(['Pt Type', 'Study ID'], axis=1)
+
+#%%
+X1 =  pd.DataFrame(preprocessing.scale(X), columns=X.columns)
+
+X1['Pt Type'] = y
+X1.set_index(ID)
+
+#%% 
+
+X1.columns = ['Age (years)', 'Gender', 'Daily alcohol use', 'Medication',
+              'ASA-classification', 'Recurrent disease?', 'Comorb',
+              'Independent, with others', 'Smokes cigarettes/sigar', 'BMI', 'GFI',
+              'HADS_A', 'HADS Depression', 'ADL', 'iADL', 'TUG', 'Handgrip strength',
+              'Avg. Steps/day', 'Avg. MVPA/day', 'Pt Type']
+
+plots = X1.columns
+
+#%% 
+import matplotlib.pylab as pylab
+
+params = {'legend.fontsize': 'x-large',
+             'axes.labelsize': 'x-large',
+             'axes.titlesize':'x-large',
+             'xtick.labelsize':'x-large',
+             'ytick.labelsize':'x-large'}
+
+pylab.rcParams.update(params)
+
+plots = plots[1:]
+namecount=0
+
+for x in plots:
+    name = str(plots[namecount])
+    plt.figure(dpi=720)
+    sns.boxplot(x='Pt Type', y=x, data=X1, boxprops=dict(alpha=0.5))
+    sns.swarmplot(x='Pt Type', y=x, data=X1)
+    plt.title('Swarm-boxplot ' + name)
+    namecount = namecount +1 
+
+