-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmoving_to_sql.py
More file actions
147 lines (108 loc) · 4.19 KB
/
Copy pathmoving_to_sql.py
File metadata and controls
147 lines (108 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Vikram Anantha
# Aug 6 2021
# Moving Students from Google Sheets to SQL Database
# HELM Learning
# utility to import Flint Subscriptions from Melissa's SmartSheet into the database for analysis
#db-schema: ex: HELM_Test_Database
#database: helmlearningdatabase-1
### IMPORTS AND SETUP
import numpy
import mysql.connector
from mysql.connector import errorcode
import pandas as pd
# set up port forwarding to your database via a terminal window
# ssh -L 3306:helmlearningdatabase-1.rds-account-id.us-east-1.rds.amazonaws.com:3306 ec2-user@52.21.172.100
#
# replace db-name, rds-account-id, ip-address-for-ec2 with your own account info
# set up database configuration (set the db-username, db-password and db-name for your database)
config = {
'user': 'helmlearning',
'password': 'H3lml3arning',
'host': '127.0.0.1', #52.21.172.100:22
'port': '3306',
'database': 'HELM_Test_Database'
}
### ESTABLISHING CONNECTION
def create_connection():
"""
Returns a database connection using mysql.connector
"""
# open database connection
global cnx
try:
cnx = mysql.connector.connect(**config)
return cnx
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
raise
# Export data to a CSV file, and then change the header row to match the column names in your database table (if they are different)
# Also, make sure that the date fields are formatted as "YYYY-MM-DD"
# change file path to match your CSV file location
filepath = "/Users/vikramanantha/Downloads/Class Responses - Arduino (1).csv"
db = "HELM_Database"
class_name = '"Fun with Arduino and Circuits"'
student_name_col = "Student_Name"
email_col = "Email_Address"
parent_col = "Parent_Email"
# get column names
cnx = create_connection()
cursor = cnx.cursor(buffered=True)
# get all the columns from your database table (replace db-schema and db-table to match your table)
#cursor.execute("show columns from HELM_Test_Database.exercise")
cursor.execute("show columns from {}.students".format(db))
arr = [column[0] for column in cursor.fetchall()]
arr = arr[1:]
# create parameter placeholders
params = "%s," * len(arr)
params = params[:-1] # remove trailing comma
# generate the insert query SQL string
cols = ','.join(arr)
print(cols)
print(params)
sql = "INSERT IGNORE INTO {}.students ({}) VALUES ({});".format(db, cols, params)
print(sql)
sql2 = 'SELECT id FROM {}.students WHERE {} = {} AND {} = {}'
sql3 = 'SELECT id FROM {}.classes WHERE name = {};'
sql4 = 'INSERT INTO {}.classes_to_students (timestamp, class_id, student_id) VALUES ({}, {}, {})'
### GETTING THE DATA
# read the CSV file into a DataFrame
df = pd.read_csv(filepath)
# only take columns from the dataframe that match columns in the database table
df2 = df[arr]
# if there are cells in the dataframe that are NaN, replace them with empty strings
df2 = df2.fillna('')
# loop through all the rows in the dataframe and insert them into the table
# commit the data every 100 rows
### PUTTING THE DATA IN
for i in df2.index:
timestamp = '"' + df2.iloc[i].values[0] + '"'
name = '"' + df2.iloc[i].values[1] + '"'
email = '"' + df2.iloc[i].values[2] + '"'
parent_email = '"' + df2.iloc[i].values[3] + '"'
print(df2.iloc[i].values.tolist())
cursor.execute(sql, df2.iloc[i].values.astype(str).tolist())
cursor.execute(sql2.format(db, student_name_col, name, email_col, email, parent_col, parent_email))
#print(sql2.format(db, student_name_col, name, email_col, email))
student_id = cursor.fetchall()[0][0]
print(student_id)
cursor.execute(sql3.format(db, class_name))
#print(sql3.format(db, class_name))
class_id = cursor.fetchall()[0][0]
#print(class_id)
#print("------------")
cursor.execute(sql4.format(db, timestamp, class_id, student_id))
if i % 100 == 99:
print('row ', i)
input("Commit?")
cnx.commit()
# do a final commit
input("Commit?")
cnx.commit()
# close the cursor and database connection
cursor.close()
cnx.close()