-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmoving_classes_to_sql.py
More file actions
135 lines (102 loc) · 3.39 KB
/
Copy pathmoving_classes_to_sql.py
File metadata and controls
135 lines (102 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Vikram Anantha
# Sept 6 2020
# Moving classes from Google Sheets to SQL Database
# HELM Learning
# utility to import Flint Subscriptions from Melissa's SmartSheet into the database for analysis
#db-schema: ex: HELM_Test_Database
#database: helmlearningdatabase-1
### IMPORTS AND SETUP
# import numpy
import mysql.connector
from mysql.connector import errorcode
import pandas as pd
# set up port forwarding to your database via a terminal window
# ssh -L 3306:helmlearningdatabase-1.rds-account-id.us-east-1.rds.amazonaws.com:3306 ec2-user@52.21.172.100
#
# replace db-name, rds-account-id, ip-address-for-ec2 with your own account info
# set up database configuration (set the db-username, db-password and db-name for your database)
config = {
'user': '[SECRET]',
'password': '[SECRET]',
'host': '[SECRET]', #52.21.172.100
'port': '[SECRET]',
'database': '[SECRET]'
}
### ESTABLISHING CONNECTION
def create_connection():
"""
Returns a database connection using mysql.connector
"""
# open database connection
global cnx
try:
cnx = mysql.connector.connect(**config)
return cnx
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
raise
# Export data to a CSV file, and then change the header row to match the column names in your database table (if they are different)
# Also, make sure that the date fields are formatted as "YYYY-MM-DD"
# change file path to match your CSV file location
filepath = "/Users/vikramanantha/Downloads/classes_helm.csv"
db = "HELM_Database"
# get column names
cnx = create_connection()
cursor = cnx.cursor(buffered=True)
# get all the columns from your database table (replace db-schema and db-table to match your table)
#cursor.execute("show columns from HELM_Test_Database.exercise")
cursor.execute("show columns from {}.classes".format(db))
arr = [column[0] for column in cursor.fetchall()]
arr = arr[1:]
print(arr)
# create parameter placeholders
params = "%s," * len(arr)
params = params[:-1] # remove trailing comma
# generate the insert query SQL string
cols = ','.join(arr)
print(cols)
print(params)
sql = "INSERT INTO {}.classes ({}) VALUES ({});".format(db, cols, params)
print(sql)
### GETTING THE DATA
# print(sql)
# read the CSV file into a DataFrame
df = pd.read_csv(filepath, error_bad_lines=False)
print(df)
# only take columns from the dataframe that match columns in the database table
df2 = df[arr]
# if there are cells in the dataframe that are NaN, replace them with empty strings
df2 = df2.fillna('')
# loop through all the rows in the dataframe and insert them into the table
# commit the data every 100 rows
### PUTTING THE DATA IN
print("OK SO\n\n")
print(df2)
print(filepath)
print(sql)
print(arr)
print(cols)
print(params)
input("We doin this? ")
for i in df2.index:
print(df2.iloc[i].values.tolist())
array = df2.iloc[i].values.astype(str).tolist()
newlist = []
for d in array:
# d = d[1:-1]
print(d)
newlist.append(d)
cursor.execute(sql, newlist)
if i % 100 == 0:
print('row ', i)
cnx.commit()
# do a final commit
cnx.commit()
# close the cursor and database connection
cursor.close()
cnx.close()