Below is the research poster I presented at the annual RISE symposium. This poster details all aspects of my research.
Below is the main part of the code I used to develop and test my new index equations for STRIPE and PRISM.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
def run_improved_index_model(csv_path):
df = pd.read_csv(csv_path)
df['Diagnosis'] = df['Diagnosis'].astype(int)
features = [
'Age', 'Gender', 'Ethnicity', 'EducationLevel', 'BMI', 'Smoking',
'AlcoholConsumption', 'PhysicalActivity', 'DietQuality', 'SleepQuality',
'FamilyHistoryParkinsons', 'TraumaticBrainInjury', 'Hypertension',
'Diabetes', 'Depression', 'Stroke', 'SystolicBP', 'DiastolicBP',
'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL', 'CholesterolTriglycerides',
'MoCA', 'FunctionalAssessment', 'Tremor', 'Rigidity',
'Bradykinesia', 'PosturalInstability', 'SpeechProblems',
'SleepDisorders', 'Constipation'
]
features = ['PosturalInstability', 'Tremor', 'FunctionalAssessment',
'MoCA', 'Bradykinesia', 'Rigidity', 'Age', 'Depression', 'FamilyHistoryParkinsons','Depression','Diabetes','Stroke','SpeechProblems','SleepDisorders']
features = ['PosturalInstability', 'Tremor', 'Bradykinesia', 'Rigidity', 'Age', 'Depression', 'FamilyHistoryParkinsons','Depression','Diabetes','Stroke','SpeechProblems','SleepDisorders']
df = df.dropna(subset=features + ['Diagnosis'])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_scaled, df['Diagnosis'])
weights = dict(zip(features, logreg.coef_[0]))
scaled_vars = {f"x_{feature}": X_scaled[:, idx] for idx, feature in enumerate(features)}
for feature in features:
exec(f"w_{feature} = weights['{feature}']")
exec(f"x_{feature} = scaled_vars['x_{feature}']")
b = logreg.intercept_[0]
w_FA_Tremor = 0.3
w_FA_Rigidity = 0.2
w_FA_Brady = 0.2
w_FA_PI = 0.2
w_Tremor_Brady = -0.2
w_Tremor_PI = -0.4
w_Rigidity_Brady = -0.2
w_FA_Age = -0.1
for name, value in scaled_vars.items():
globals()[name] = value
improved_index = (
sum([eval(f"w_{f}") * eval(f"x_{f}") for f in features]) +
x_Bradykinesia +
x_PosturalInstability +
w_Tremor_Brady * x_Tremor * x_Bradykinesia +
w_Tremor_PI * x_Tremor * x_PosturalInstability +
w_Rigidity_Brady * x_Rigidity * x_Bradykinesia +
b
)
equation_terms = []
for feature in features:
weight = weights[feature]
equation_terms.append(f"({weight:.3f}) * x_{feature}")
selected_interactions = [
(w_Tremor_Brady, "x_Tremor", "x_Bradykinesia"),
(w_Tremor_PI, "x_Tremor", "x_PosturalInstability"),
(w_Rigidity_Brady, "x_Rigidity", "x_Bradykinesia"),
]
for weight, var1, var2 in selected_interactions:
equation_terms.append(f"({weight:.3f}) * {var1} * {var2}")
equation_terms.append(f"{b:.3f}")
equation_str = " +\n ".join(equation_terms)
print("improved_index =\n", equation_str)
X_index = improved_index.reshape(-1, 1)
y = df['Diagnosis'].values
updrs = df.loc[df.index, 'UPDRS'].values
diagnosis = df.loc[df.index, 'Diagnosis'].values
corr_index_updrs = np.corrcoef(improved_index, diagnosis)[0, 1]
print(f"Pearson correlation between improved index and UPDRS: {corr_index_updrs:.4f}")
corr_updrs_diagnosis = np.corrcoef(updrs, diagnosis)[0, 1]
print(f"Pearson correlation between UPDRS and Diagnosis: {corr_updrs_diagnosis:.4f}")
X_train, X_test, y_train, y_test = train_test_split(X_index, y, test_size=0.3, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]
acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_prob)
print(f"Accuracy on test set: {acc:.3f}")
print(f"AUC on test set: {auc:.3f}")
if __name__ == "__main__":
run_improved_index_model('parkinsons_disease_data.csv')