136 lines
5.0 KiB
Python
136 lines
5.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Mon Mar 8 10:38:31 2021
|
|
|
|
@author: Dijkhofmf
|
|
"""
|
|
|
|
# Import stuff
|
|
import os
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
|
|
pd.options.mode.chained_assignment = None # default='warn'
|
|
|
|
#%% Define filenames and path
|
|
|
|
|
|
FilenameComplete = 'Complete.csv'
|
|
FilenameDemo = 'DemoData.csv'
|
|
Filename_T0 = 'FinalDF_T0.csv'
|
|
|
|
|
|
Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
|
|
|
|
# Set path
|
|
os.chdir(Path)
|
|
|
|
DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete))
|
|
|
|
DFDemo = pd.DataFrame(pd.read_csv(FilenameDemo))
|
|
DFDemo['Complete data'] = DFComplete['Has patient completed study?']
|
|
DFDemo = DFDemo.drop(DFDemo[DFDemo['Complete data'] !='Yes'].index)
|
|
|
|
DFDemo['ASA-classification'] = DFDemo['ASA-classification'].str.replace('ASA ', '').astype('float64')
|
|
DFDemo = DFDemo.replace('Unchecked', 0)
|
|
DFDemo = DFDemo.replace('Checked', 1)
|
|
Dropcols = ['Year of birth', 'Subject ID Connecare', 'Subject ID Connecare (version 2.0)','Date subject signed consent', 'Nationality', 'Language', 'Former occupation',
|
|
'Does the patient have a smartphone that they use?', 'How many days a week is the smartphone used?',
|
|
'Does the patient have a tablet that they use?','How many days a week is the tablet used?','Does the patient have a computer/pc that they use?',
|
|
'How many days a week is the computer/pc used?','Smart device at home', 'Smart device at inclusion? (check all that apply) (choice=Fitbit)',
|
|
'Smart device at inclusion? (check all that apply) (choice=Weight scale)','Indication Surgery', 'Comments', 'Complete?', 'Complete data']
|
|
DFDemo = DFDemo.drop(Dropcols, axis=1)
|
|
DFDemo = DFDemo.set_index('Study ID')
|
|
|
|
# Calculate CCI score
|
|
DFDemo.iloc[:,20:26] = DFDemo.iloc[:,20:26]*2
|
|
DFDemo.iloc[:,26] = DFDemo.iloc[:,26]*3
|
|
DFDemo.iloc[:,26:28] = DFDemo.iloc[:,26:28]*6
|
|
|
|
ColMask = DFDemo.columns[10:29]
|
|
DFDemo['Comorb'] = DFDemo[ColMask].sum(axis=1)
|
|
DFDemo = DFDemo.drop(ColMask, axis=1)
|
|
|
|
#%%
|
|
|
|
DF_T0 = pd.DataFrame(pd.read_csv(Filename_T0))
|
|
DF_T0 = DF_T0.set_index('Study ID')
|
|
|
|
DFDemo['Type'] = DF_T0['Pt Type']
|
|
|
|
#%% code variables
|
|
|
|
DFDemo['Gender'] = DFDemo['Gender'].replace('Female', 0)
|
|
DFDemo['Gender'] = DFDemo['Gender'].replace('Male', 1)
|
|
|
|
Housing = pd.get_dummies(DFDemo['Housing'], drop_first=True)
|
|
Education = pd.get_dummies(DFDemo['Education'], drop_first=True)
|
|
Smoking = pd.get_dummies(DFDemo['Smoking'], drop_first=True)
|
|
Med_Dif = pd.get_dummies(DFDemo['Difficulty preparing medication?'], drop_first=True)
|
|
Loc_Tu = pd.get_dummies(DFDemo['Location tumour'], drop_first=True)
|
|
Prim_Mal = pd.get_dummies(DFDemo['Primary Malignancy'], drop_first=True)
|
|
|
|
DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('No', 0)
|
|
DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('Yes', 1)
|
|
|
|
DFDemo = DFDemo.drop(['Marital State', 'Housing', 'Education', 'Tumour Stage', 'Smoking', 'Difficulty preparing medication?',
|
|
'Location tumour', 'Primary Malignancy'], axis=1)
|
|
|
|
|
|
#%%
|
|
DFDemo = pd.concat([DFDemo, Housing, Education, Smoking, Med_Dif, Loc_Tu, Prim_Mal], axis=1)
|
|
|
|
#%% Create Neoadjuvant therapy variable
|
|
|
|
for i,r in DFDemo.iterrows():
|
|
if (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1) & (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1):
|
|
DFDemo.loc[i,'Neo'] = 1
|
|
elif DFDemo.loc[i, 'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1:
|
|
DFDemo.loc[i,'Neo'] = 2
|
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Immunotherapy)'] == 1:
|
|
DFDemo.loc[i,'Neo'] = 3
|
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1:
|
|
DFDemo.loc[i,'Neo'] = 4
|
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Targeted Therapy)'] == 1:
|
|
DFDemo.loc[i,'Neo'] = 5
|
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=None)'] == 1:
|
|
DFDemo.loc[i,'Neo'] = 0
|
|
|
|
Neo = pd.get_dummies(DFDemo['Neo'], drop_first=True)
|
|
|
|
NeoDrop = ['Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Immunotherapy)',
|
|
'Neo-adjuvant therapy (choice=Radiotherapy)', 'Neo-adjuvant therapy (choice=None)', 'Neo-adjuvant therapy (choice=Targeted Therapy)', 'Neo']
|
|
|
|
DFDemo = DFDemo.drop(NeoDrop, axis=1)
|
|
|
|
DFDemo = pd.concat([DFDemo, Neo], axis=1)
|
|
|
|
#%%
|
|
plt.figure()
|
|
sns.displot(DFDemo['Age (years)'])
|
|
|
|
#%%
|
|
|
|
DemoComp = DFDemo[DFDemo['Type'] != 'Healthy']
|
|
DemoComp = DemoComp.drop('Type', axis=1)
|
|
DemoNoComp = DFDemo[DFDemo['Type'] == 'Healthy']
|
|
DemoNoComp = DemoNoComp.drop('Type', axis=1)
|
|
|
|
from scipy import stats
|
|
|
|
#outcome = pd.DataFrame(index=['stat', 'p-value'])
|
|
outcomeT = stats.ttest_ind(DemoNoComp, DemoComp, nan_policy='omit')
|
|
|
|
OutcomeT = outcomeT[1].tolist()
|
|
|
|
OutcomeMW = []
|
|
for column in DemoComp:
|
|
print(column)
|
|
outcomeMW = stats.mannwhitneyu(DemoNoComp[column], DemoComp[column])
|
|
OutcomeMW.append(outcomeMW[1])
|
|
|
|
|
|
#DFDemo.to_csv('FinalDemo.csv') |