Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
348 changes: 163 additions & 185 deletions computable_phenotypes/script.sql

Large diffs are not rendered by default.

55 changes: 28 additions & 27 deletions computable_phenotypes/utils/Patients.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from datetime import datetime
import pprint

from computable_phenotypes.utils.db import execute


class Patients:
def __init__(self, db=None, verbose=False):
def __init__(self, database_name=None, verbose=False):
self.patients = {}
self.loaded = False
self.verbose = verbose
self.db = db
if db is None:
self.write = False
else:
self.write = True
self.database_name = database_name


def add_patient(self, birthday, race=None, sex=None, hisp=None, pat_id=None):
"""
Expand All @@ -32,19 +30,21 @@ def add_patient(self, birthday, race=None, sex=None, hisp=None, pat_id=None):
if base.get(pat_id) is not None:
return pat_id
# Add Patient
date_obj=datetime.strptime(birthday, "%m/%d/%Y")
birthdate=date_obj.strftime("\'%Y-%m-%d\'")
patient_info = {
"pat_id": pat_id,
"birthdate": date_parser(birthday),
"birthdate": birthdate,
"race": race,
"sex": sex,
"hispanic": hisp,
"encounters": {},
}
base[pat_id] = patient_info
if self.write:
pat_instr = f"""insert into dbo.Demographic values({patient_info['pat_id']},{patient_info['birthdate']},{patient_info['sex']},{patient_info['hispanic']},{patient_info['race']});"""
self.log(pat_instr)
execute(self.db, pat_instr)

pat_instr = f"""insert into Demographic values({patient_info['pat_id']},{patient_info['birthdate']},{patient_info['sex']},{patient_info['hispanic']},{patient_info['race']});"""
self.log(pat_instr)
execute(self.database_name, pat_instr)
return pat_id

def add_enc(
Expand All @@ -70,19 +70,23 @@ def add_enc(
if base.get(enc_id) is not None:
return enc_id
# Add Enc
date_obj=datetime.strptime(admit_date, "%m/%d/%Y")
admitdate=date_obj.strftime("\'%Y-%m-%d\'")
date_obj=datetime.strptime(dis_date, "%m/%d/%Y")
disdate=date_obj.strftime("\'%Y-%m-%d\'")
enc_info = {
"enc_id": enc_id,
"admit_date": date_parser(admit_date),
"discharge_date": date_parser(dis_date),
"admit_date": admitdate,
"discharge_date": disdate,
"enc_type": nullify(enc_type),
"raw_enc_type": nullify(raw_enc_type),
"diagnosisList": {},
}
base[enc_id] = enc_info
if self.write:
enc_instr = f"""insert into dbo.Encounter values({pat_id},{enc_info['enc_id']},{enc_info['admit_date']},{enc_info['enc_type']},{enc_info['raw_enc_type']},{enc_info['discharge_date']});"""
self.log(enc_instr)
execute(self.db, enc_instr)

enc_instr = f"""insert into Encounter values({pat_id},{enc_info['enc_id']},{enc_info['admit_date']},{enc_info['enc_type']},{enc_info['raw_enc_type']},{enc_info['discharge_date']});"""
self.log(enc_instr)
execute(self.database_name, enc_instr)
return enc_id

def add_dx(self, pat_id, enc_id, code, dx_id=None, dx_source=None, db=None):
Expand All @@ -109,10 +113,10 @@ def add_dx(self, pat_id, enc_id, code, dx_id=None, dx_source=None, db=None):
"dx_source": nullify(dx_source),
}
base[dx_id] = dx_info
if self.write:
dx_instr = f"""insert into dbo.Diagnosis values({pat_id},{dx_info['dx_id']},{dx_info['dx']},{dx_info['dx_type']},{dx_info['dx_source']},{enc_id});"""
self.log(dx_instr)
execute(self.db, dx_instr)

dx_instr = f"""insert into Diagnosis values({pat_id},{dx_info['dx_id']},{dx_info['dx']},{dx_info['dx_type']},{dx_info['dx_source']},{enc_id});"""
self.log(dx_instr)
execute(self.database_name, dx_instr)
return dx_id

def print(self):
Expand All @@ -132,15 +136,15 @@ def batch_load(self, db_connection):

for patid, pat in self.patients:
# load patient
pat_instr = f"""insert into dbo.Demographic values({pat['pat_id']},{pat['birthdate']},{pat['sex']},{pat['hispanic']},{pat['race']});"""
pat_instr = f"""insert into Demographic values({pat['pat_id']},{pat['birthdate']},{pat['sex']},{pat['hispanic']},{pat['race']});"""
self.log(pat_instr)
execute(db_connection, pat_instr)
for enc_id, encs in pat["encounters"]:
enc_instr = f"""insert into dbo.Encounter values({pat['pat_id']},{encs['enc_id']},{encs['admit_date']},{encs['enc_type']},{encs['raw_enc_type']},{encs['discharge_date']});"""
enc_instr = f"""insert into Encounter values({pat['pat_id']},{encs['enc_id']},{encs['admit_date']},{encs['enc_type']},{encs['raw_enc_type']},{encs['discharge_date']});"""
self.log(enc_instr)
execute(db_connection, enc_instr)
for dx_id, dx in encs["diagnosis_list"]:
dx_instr = f"""insert into dbo.Diagnosis values({pat['pat_id']},{dx['dx_id']},{dx['dx']},{dx['dx_type']},{dx['dx_source']},{encs['enc_id']});"""
dx_instr = f"""insert into Diagnosis values({pat['pat_id']},{dx['dx_id']},{dx['dx']},{dx['dx_type']},{dx['dx_source']},{encs['enc_id']});"""
self.log(dx_instr)
execute(db_connection, dx_instr)

Expand All @@ -160,6 +164,3 @@ def nullify(val):
return string_wrap(val)


def date_parser(date):
split = date.split("/")
return f'\'{"-".join(split)}\''
150 changes: 75 additions & 75 deletions computable_phenotypes/utils/db.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,89 @@
import json
import os
import urllib
from subprocess import PIPE, run

import pyodbc
import sqlite3
from dotenv import load_dotenv
from sqlalchemy import create_engine, text


def create_database(connection, database_name):
connection.execution_options(isolation_level="AUTOCOMMIT").execute(
text(f"CREATE DATABASE {database_name};")
)
def create_database(database_name):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()

return cursor

def delete_database(connection, database_name):
execute(
connection,
f"""IF DB_ID('{database_name}') IS NOT NULL
DROP DATABASE {database_name};""",
allow_transaction=False, # Disable transaction for this command
)

def delete_database(database_name):
if os.path.exists(database_name):
os.remove(database_name)

def execute(connection, command, allow_transaction=True):
if allow_transaction:
# For normal commands, use transactions
with connection.begin():
connection.execute(text(command))
else:
# For commands like CREATE DATABASE or DROP DATABASE, use AUTOCOMMIT
connection.execution_options(isolation_level="AUTOCOMMIT").execute(
text(command)
)


def run_script(sql_script, db, output):
host, user, password = load_env()
process = run(
[f"sqlcmd -C -U {user} -P {password} -d {db} -i {sql_script} "],
stderr=PIPE,
stdout=PIPE,
stdin=PIPE,
shell=True,
) # -o {output}
# process.stdin.write(password)
# print(process.stderr)
# print(process.stdout)

def execute(database_name, command):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
cursor.execute(command)
connection.commit()
def run_script(sql_script, database_name, output):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
with open(sql_script, 'r') as file:
sql_script_content = file.read()
cursor.executescript(sql_script_content)
connection.commit()
connection.close()

def connect(database=None):
host, user, password = load_env()
database_conn = ""
if database:
database_conn = ";database=" + database
# host, user, password = load_env()
# database_conn = ""
# if database:
# database_conn = ";database=" + database

conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={host};UID={user};PWD={password};TrustServerCertificate=yes{database_conn}"
params = urllib.parse.quote_plus(conn_str)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

return engine


def load_env():
pyodbc.pooling = False
load_dotenv()
sql_host = os.getenv("MSSQL_HOST")
sql_username = os.getenv("MSSQL_USERNAME")
sql_password = os.getenv("MSSQL_PASSWORD")
return sql_host, sql_username, sql_password


def remove_table(db_conn, table_name):
execute(
db_conn,
f"""if OBJECT_ID('{table_name}', 'U') is not NULL
drop table {table_name};""",
)


def create_tables(db_conn):
remove_table(db_conn, "dbo.Encounter")
remove_table(db_conn, "dbo.Diagnosis")
remove_table(db_conn, "dbo.Demographic")
remove_table(db_conn, "dbo.PCOR_Encounters")
encounter = """CREATE TABLE dbo.Encounter (
# conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={host};UID={user};PWD={password};TrustServerCertificate=yes{database_conn}"
# params = urllib.parse.quote_plus(conn_str)
# engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")
connection = sqlite3.connect(database)
cursor = connection.cursor()

return cursor

def fetch(database_name, query):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()

# Run the SELECT query
query = query
cursor.execute(query)

# Fetch the results and column names
rows = cursor.fetchall()
column_names = [description[0] for description in cursor.description]

# Convert to a list of dictionaries
data = [dict(zip(column_names, row)) for row in rows]

# Convert to JSON
json_data = json.dumps(data, indent=4)

# Close the database connection
connection.close()
return json_data

def remove_table(database_name, table_name):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS " + table_name )

connection.commit()


def create_tables(database_name):
remove_table(database_name, "Encounter")
remove_table(database_name, "Diagnosis")
remove_table(database_name, "Demographic")
remove_table(database_name, "PCOR_Encounters")
encounter = """CREATE TABLE Encounter (
PATID int,
ENCOUNTERID nvarchar(30) NULL,
ADMIT_DATE datetime NULL,
Expand All @@ -92,7 +92,7 @@ def create_tables(db_conn):
DISCHARGE_DATE datetime NULL
);"""
diagnosis = """
CREATE TABLE dbo.Diagnosis
CREATE TABLE Diagnosis
(
PATID int,
DIAGNOSISID nvarchar(10) NULL,
Expand All @@ -102,14 +102,14 @@ def create_tables(db_conn):
ENCOUNTERID nvarchar(30) NULL
);"""
demo = """
CREATE TABLE dbo.Demographic
CREATE TABLE Demographic
(
PATID int,
BIRTH_DATE datetime,
SEX nvarchar(2),
HISPANIC nvarchar(2),
RACE nvarchar(2)
);"""
execute(db_conn, encounter)
execute(db_conn, diagnosis)
execute(db_conn, demo)
execute(database_name, encounter)
execute(database_name, diagnosis)
execute(database_name, demo)
48 changes: 21 additions & 27 deletions computable_phenotypes/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
create_tables,
delete_database,
run_script,
fetch
)
from computable_phenotypes.utils.Patients import Patients


def read_json(patients_list, connection):
patients = Patients(db=connection)
def read_json(patients_list, database_name):
patients = Patients(database_name=database_name)

for pat in patients_list:
try:
Expand All @@ -37,42 +38,35 @@ def read_json(patients_list, connection):


def process_json(patients_list: list[dict]):
db_engine = connect()
database_name = "test"
# connection.autocommit=True

try:
logger.info("Deleting DB")
with db_engine.connect() as conn:
delete_database(conn, database_name)
delete_database(database_name)

logger.info("Creating DB")
with db_engine.connect() as conn:
create_database(conn, database_name)
create_database(database_name)

create_tables(database_name)
logger.info("Reading Input")
read_json(patients_list, database_name)
# tables_conn.commit()
logger.info("Running Script")
run_script(
"./computable_phenotypes/script.sql",
database_name,
"./script_output.txt",
)


logger.info("Creating tables")
tables_engine = connect(database_name)
with tables_engine.connect() as tables_conn:
create_tables(tables_conn)
logger.info("Reading Input")
read_json(patients_list, tables_conn)
# tables_conn.commit()
logger.info("Running Script")
run_script(
"./computable_phenotypes/script.sql",
database_name,
"./script_output.txt",
)

res = pd.read_sql_query(
"select * from dbo.NS_Final_Inclusions", tables_conn
)
output = res.to_json(orient="records", indent=2, date_format="iso")
tables_engine.dispose()
output = fetch(database_name,"select * from NS_Final_Inclusions")
except Exception as e:
print(f"An error occurred: {e}")
finally:
logger.info("Deleting DB")
with db_engine.connect() as drop_conn:
delete_database(drop_conn, database_name)
delete_database(database_name)

return output

Expand Down
Binary file added test
Binary file not shown.
Binary file added test.db
Binary file not shown.