# -*- coding: utf-8 -*- """ Script for parsing the Fitbit data into graphs. @author M.F. Dijkhof """ # Import stuff import os import pandas as pd import seaborn as sns import numpy as np import matplotlib.pyplot as plt # Disable copy overwrite warning pd.options.mode.chained_assignment = None # default='warn' #%% Define filenames and path FilenameComp = 'SurgeryAndAdmission2.csv' #Surg and Adm + Complications FilenamePA = 'PA_Data.csv' FilenameSteps = 'StepData.csv' FilenameComplete = 'Complete.csv' FilenameOutcome = 'Complications.csv' Path = 'I:\Mike Dijkhof\Connecare MGP\Data' # Set path os.chdir(Path) #%% Create DF from files DFComp = pd.DataFrame(pd.read_csv(FilenameComp)) DFPA = pd.DataFrame(pd.read_csv(FilenamePA)) DFSteps = pd.DataFrame(pd.read_csv(FilenameSteps)) DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete)) DFOutcome = pd.DataFrame(pd.read_csv(FilenameOutcome)) DFComp = DFComp.set_index('Study ID') DFPA = DFPA.set_index('Study ID') DFSteps = DFSteps.set_index('Study ID') DFComplete = DFComplete.set_index('Study ID') DFOutcome = DFOutcome.set_index('Study ID') #%% # Clear all uncomplete cases CompleteCheck= DFComplete['Has patient completed study?'] == 'Yes' DFComp = DFComp[CompleteCheck] DFPA = DFPA[CompleteCheck] DFOutcome = DFOutcome[CompleteCheck] DFSteps = DFSteps[CompleteCheck] # Transpose PA data into the right format NewDF= pd.DataFrame(DFPA.iloc[0]).transpose() counter = range(1, len(DFPA)) for i in counter: NewRow = DFPA.iloc[i].transpose() NewDF = NewDF.append(NewRow) NewDF = NewDF.drop(['Complete?'], axis=1) # Do the same for Step data NewStepDF = pd.DataFrame(DFSteps.iloc[0]).transpose() counter = range(1, len(DFSteps)) for i in counter: NewRow = DFSteps.iloc[i].transpose() NewStepDF = NewStepDF.append(NewRow) NewStepDF = NewStepDF.drop(['Complete?'], axis=1) #%% Create DF with important dates DFDates = DFComp [['Date of surgery','Date of hospital discharge', 'Date first complication at home', 'Date (first) readmission', 'Date discharge after first readmission', 'Date second readmission', 'Date discharge second readmission']] for i in DFDates: DFDates[i] = pd.to_datetime(DFDates[i]).dt.date DFDates['LOS'] = DFDates['Date of hospital discharge'] - DFDates['Date of surgery'] #LOS = Length of stay DFDates['TTC'] = DFDates['Date first complication at home'] - DFDates['Date of surgery'] #TTC = Time to complication DFDates['TTR'] = DFDates['Date (first) readmission'] - DFDates['Date of surgery'] #TTR = Time to readmission DFDates['TT2R'] = DFDates['Date second readmission'] - DFDates['Date of surgery'] #TT2R = Time to second readmission #%% Create coordinates from the dates for the plots AXVcoord = pd.DataFrame(columns= ['LOS', 'TTC', 'TTR', 'TT2R']) for rows, index in DFDates.iterrows(): AXVcoord.loc[rows, 'LOS'] = DFDates['LOS'].loc[rows].days AXVcoord.loc[rows, 'TTC'] = DFDates['TTC'].loc[rows].days AXVcoord.loc[rows, 'TTR'] = DFDates['TTR'].loc[rows].days AXVcoord.loc[rows, 'TT2R'] = DFDates['TT2R'].loc[rows].days AXVcomb = AXVcoord.values.tolist() AXVArray = np.array(AXVcomb) #%% Create DFs for each PA level NoActDF = NewDF.loc[:, :'No activity After Surgery: 90'] LowActDF = NewDF.loc[:, 'Low activity Before Surgery: -1 ':'Low activity After Surgery: 90'] MedActDF = NewDF.loc[:, 'Medium activity Before Surgery: -1':'Medium activity After Surgery: 90'] HighActDF = NewDF.loc[:, 'High activity Before Surgery: -1 ':'High activity After Surgery: 90'] def MakeStepDF(NewDF): StepDF = NewDF.iloc[:,321:427] StepDF = StepDF.drop('Days Fitbit prescribed after surgery', axis=1) StepDF = StepDF.replace(' ', '') StepDF = StepDF.replace('N.A.', np.nan) StepDF = StepDF.replace('N.A. ', np.nan) StepDF = StepDF.replace('NA.', np.nan) StepDF = StepDF.replace('n.a.', np.nan) StepDF = StepDF.replace('N.A', np.nan) StepDF = StepDF.replace('NaN', np.nan) StepDF = StepDF.astype('float64') return StepDF StepDF = MakeStepDF(NewStepDF) #%% Day -14 to surgery were in the wrong order so we have to flip the first 14 days def DayFlipper(DF): ListCol = DF.columns.tolist() ListCol[0:14] = ListCol[0:14][::-1] DF = DF[ListCol] return(DF) NoActDF = DayFlipper(NoActDF) print(NoActDF.columns) LowActDF = DayFlipper(LowActDF) print(LowActDF.columns) MedActDF = DayFlipper(MedActDF) print(MedActDF.columns) HighActDF = DayFlipper(HighActDF) print(HighActDF.columns) StepDF = DayFlipper(StepDF) print(StepDF.columns) #%% OldColumns = LowActDF.columns NewColumns = range(-14, 91) LowActDF.columns = NewColumns MedActDF.columns = NewColumns HighActDF.columns = NewColumns StepDF.columns = NewColumns # Set NaN to zeroes in order to calculate the total amount of activity LowActDFZeroes = LowActDF.fillna(0) MedActDFZeroes = MedActDF.fillna(0) HighActDFZeroes = HighActDF.fillna(0) StepDFZeroes = StepDF.fillna(0) TotActDF = LowActDF + MedActDF + HighActDF TotActDFZeroes = LowActDFZeroes + MedActDFZeroes + HighActDFZeroes # Remove pts that reported less than threshold PA days Threshold = 200 NaNCount = LowActDF.isnull().sum(axis=1) # Count days without data per patient NaNRowDrop = (LowActDF.isnull().sum(axis=1)) < Threshold NoActDFClean = NoActDF[NaNRowDrop] LowActDFClean = LowActDFZeroes[NaNRowDrop] MedActDFClean = MedActDFZeroes[NaNRowDrop] HighActDFClean = HighActDFZeroes[NaNRowDrop] TotActDFClean = TotActDFZeroes[NaNRowDrop] #%% # NoActDFClean['Group'] = 'Complication' # LowActDFClean['Group'] = 'Complication' # MedActDFClean['Group'] = 'Complication' # HighActDFClean['Group'] = 'Complication' # TotActDFClean['Group'] = 'Complication' # StepDF['Group'] = 'Complication' def Grouper(DF): DF['Group'] = 'Complication' DF['Group'] = DF['Group'].where(DFOutcome['Complications at home during monitoring ? '] == 'Yes', other='No Comp') return DF NoActDFClean = Grouper(NoActDFClean) LowActDFClean = Grouper(LowActDFClean) MedActDFClean = Grouper(MedActDFClean) HighActDFClean = Grouper(HighActDFClean) TotActDFClean = Grouper(TotActDFClean) StepDF = Grouper(StepDF) # #%% Divide Comps, Non-comps and Unknown-Comps # LowActComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] # MedActComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] # HighActComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] # TotActComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] # LowActNoComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'No'] # MedActNoComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'No'] # HighActNoComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'No'] # TotActNoComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'No'] # LowActUnk = LowActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] # MedActUnk = MedActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] # HighActUnk = HighActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] # TotActUnk = TotActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] #%% Plot comps, non-comps amd unknown patient data with event-dates colors = ['k','c','r', 'r'] # k=discharge, c=complication, r=readmissions def PAPlotter(Low, Med, High, Tot, Step, AXV): for index, row in Tot.iterrows(): counter = index-1 fig, ax1 = plt.subplots(figsize=(20,8)) ax1.plot(Low.loc[index], 'b:') ax1.plot(Med.loc[index], 'r:') ax1.plot(High.loc[index], 'y:') ax1.plot(Tot.loc[index]) ax1.set_ylabel('Minutes of PA') ax1.set_xlabel('Days') plt.ylim(0,1440) plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed') plt.vlines(AXV[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted') ax2 = ax1.twinx() ax2.plot(Step.loc[index], 'k') ax2.set_ylabel('Steps per day') plt.title('PA levels comp pt' + str(index)) plt.ylim(0,25000) PAPlotter(LowActDFClean, MedActDFClean, HighActDFClean, TotActDFClean, StepDF, AXVcomb) #PAPlotter(LowActNoComp, MedActNoComp, HighActNoComp,TotActNoComp, StepDF, AXVcomb, 'No Complication') #PAPlotter(LowActUnk, MedActUnk, HighActUnk, TotActUnk, StepDF, AXVcomb, 'Unknown Complication') #%% Calculate differences between comp PA and no comp PA def PAStats(DF, group): MeanTotPA = DF.mean().mean() StdTotPA = DF.std().std() PreMean= DF.loc[:,-14:-1].mean().mean() PreStd = DF.loc[:,-14:-1].std().std() Post30Mean = DF.loc[:,0:30].mean().mean() Post30Std = DF.loc[:,0:30].std().std() Post60Mean = DF.loc[:,0:60].mean().mean() Post60Std = DF.loc[:,0:60].std().std() Post90Mean = DF.loc[:,0:90].mean().mean() Post90Std = DF.loc[:,0:90].std().std() print('Stats '+ group + ':', '\n') print('Total Mean min PA ='+ str(MeanTotPA),'Std=' + str(StdTotPA)) print('Preoperative Mean min PA =' + str(PreMean), 'Std=' + str(PreStd)) print('30 days Postop. Mean min PA =' + str(Post30Mean), 'Std=' + str(Post30Std)) print('60 days Postop. Mean min PA =' + str(Post60Mean), 'Std=' + str(Post60Std)) print('90 days Postop. Mean min PA =' + str(Post90Mean), 'Std=' + str(Post90Std),'\n') PAStats(TotActComp, 'complication') PAStats(TotActNoComp, 'no complication') PAStats(TotActUnk, 'unkown') #%% Plot histogram number of missing values CountDF = pd.DataFrame(NaNCount) CountDF['Complication'] = DFCompl['Complications at home during monitoring ? '] CountDF.columns = ['Count', 'Complication'] sns.displot(CountDF, x='Count', bins=[10, 20, 30, 40, 50, 60, 70, 80, 90], hue='Complication') sns.color_palette ('colorblind') #%% def RollingAvAct(DF, windowsize): AvDF = pd.DataFrame() for index, row in DF.iterrows(): AvDF = AvDF.append(row.rolling(windowsize, min_periods=1).mean()) return(AvDF) AvTotActComp =pd.DataFrame(RollingAvAct(TotActComp, 3)) AvTotActNoComp = pd.DataFrame(RollingAvAct(TotActNoComp, 3)) #%% def Trendliner(DF, Dates, group): newPASlopePre = pd.DataFrame(columns=['Slope', 'Int', 'Group']) newPASlopeLOS = pd.DataFrame(columns=['Slope', 'Int', 'Group']) newPASlopePost = pd.DataFrame(columns=['Slope', 'Int', 'Group']) for index, row in DF.iterrows(): counter = index-1 DisDay = int(AXVArray[counter,0]) DisDay2 = int(DisDay+15) DisDay3 = int(DisDay2-1) # Calculate trendline pre-op Xpre = DF.columns[0:15] Ypre = DF.loc[index,-14:0] z_pre = np.polyfit(Xpre, Ypre, 1) p_pre = np.poly1d(z_pre) newPASlopePre.loc[index,'Slope'] = z_pre[0] newPASlopePre.loc[index,'Int'] = z_pre[1] newPASlopePre.loc[index, 'Group'] = group # Calculate trendline LOS Xlos = DF.columns[14:DisDay2] Ylos = DF.loc[index,0:DisDay] z_los = np.polyfit(Xlos, Ylos, 1) p_los = np.poly1d(z_los) newPASlopeLOS.loc[index,'Slope'] = z_los[0] newPASlopeLOS.loc[index,'Int'] = z_los[1] newPASlopeLOS.loc[index, 'Group'] = group # Calculate trendline post-op Xpost = DF.columns[DisDay3:] Ypost = DF.loc[index,DisDay:] z_post = np.polyfit(Xpost, Ypost, 1) p_post = np.poly1d(z_post) newPASlopePost.loc[index,'Slope'] = z_post[0] newPASlopePost.loc[index,'Int'] = z_post[1] newPASlopePost.loc[index, 'Group'] = group # Plot figures plt.figure(figsize=(24,8)) plt.plot(DF.loc[index]) plt.plot(Xpost,p_post(Xpost),'r--') plt.plot(Xpre, p_pre(Xpre), 'b--') plt.plot(Xlos, p_los(Xlos), 'k--') plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed') plt.vlines(Dates[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted') plt.xlim(-14,105) plt.ylim(0,1440) plt.ylabel('Minutes of PA') plt.xlabel('Days') plt.title('Mov Avg PA levels pt' + str(index) + '_' + group) d = {'Pre': newPASlopePre, 'LOS':newPASlopeLOS, 'Post': newPASlopePost} return(d) TrendDictComp = Trendliner(AvTotActComp, AXVcomb, 'complication') TrendDictNoComp= Trendliner(AvTotActNoComp, AXVcomb, 'no complication') #%% # def SlopeStats(SlopeDict, group): # MeanSlopePre, MeanIntPre = SlopeDict['Pre'].mean() # StdSlopePre, StdIntPre = SlopeDict['Pre'].std() # MeanSlopeLOS, MeanIntLOS = SlopeDict['LOS'].mean() # StdSlopeLOS, StdIntLOS = SlopeDict['LOS'].std() # MeanSlopePost, MeanIntPost = SlopeDict['Post'].mean() # StdSlopePost, StdIntPost = SlopeDict['Post'].std() # print('Stats '+ group + ':', '\n') # print('Mean slope PA Pre-op = '+ str(MeanSlopePre),'Std= ' + str(StdSlopePre)) # print('Mean slope PA hospitalization = '+ str(MeanSlopeLOS),'Std= ' + str(StdSlopeLOS)) # print('Mean slope PA Post-op = '+ str(MeanSlopePost),'Std= ' + str(StdSlopePost)) # print('Mean intersept PA Pre-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre)) # print('Mean intercept PA hospitalization = '+ str(MeanIntLOS),'Std= ' + str(StdIntLOS)) # print('Mean intercept PA Post-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre), '\n') # return(MeanSlopePre, StdSlopePre, MeanSlopeLOS, StdSlopeLOS, MeanSlopePost, StdSlopePost) # MeanSlopePreComp, StdSlopePreComp, MeanSlopeLOSComp, StdSlopLOSComp, MeanSlopePostComp, StdSlopeComp, = SlopeStats(TrendDictComp, 'complications') # MeanSlopePreNoComp, StdSlopePreNoComp, MeanSlopeLOSNoComp, StdSlopLOSNoComp, MeanSlopePostNoComp, StdSlopeNoComp = SlopeStats(TrendDictNoComp, 'no complications') #%% # SlopeIntPreComp = pd.DataFrame(TrendDictComp['Pre']) # SlopeIntPreComp['Period'] = 'Pre' # SlopeIntPreNoComp= pd.DataFrame(TrendDictNoComp['Pre']) # SlopeIntPreNoComp['Period'] = 'Pre' # SlopeIntLOSComp = pd.DataFrame(TrendDictComp['LOS']) # SlopeIntLOSComp['Period'] = 'LOS' # SlopeIntLOSNoComp= pd.DataFrame(TrendDictNoComp['LOS']) # SlopeIntLOSNoComp['Period'] = 'LOS' # SlopeIntPostComp = pd.DataFrame(TrendDictComp['Post']) # SlopeIntPostComp['Period'] = 'Post' # SlopeIntPostNoComp= pd.DataFrame(TrendDictNoComp['Post']) # SlopeIntPostNoComp['Period'] = 'Post' # Slope = pd.DataFrame() # Slope = Slope.append([SlopeIntPreComp, SlopeIntPreNoComp, SlopeIntLOSComp, SlopeIntLOSNoComp, SlopeIntPostComp, SlopeIntPostNoComp]) # Slope['Slope'] = Slope['Slope'].astype('float64') # Slope['Int'] = Slope['Int'].astype('float64') #%% # plt.figure(figsize=(12,8)) # sns.set_theme(style="darkgrid") # sns.violinplot(x=Slope['Period'], y=Slope['Slope'],hue=Slope['Group'], palette="muted", split=True) # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # plt.figure(figsize=(12,8)) # sns.set_theme(style="darkgrid") # sns.violinplot(x=Slope['Period'], y=Slope['Int'],hue=Slope['Group'], palette="muted", split=True) # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) #%% #fig, axes = plt.subplots(1,2, sharey=True) #sns.violinplot(data=newPASlopeComp['Intercept'], ax=axes[0], color='b') #sns.violinplot(data=newPASlopeNoComp['Intercept'], ax=axes[1], color='r')