forked from h4sohail/feedback-analyzer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetcher.py
More file actions
119 lines (95 loc) · 4.05 KB
/
Copy pathfetcher.py
File metadata and controls
119 lines (95 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
import sys, argparse, time
import tweepy
import mysql.connector
import unicodedata
from unidecode import unidecode
def string_cleaner(input_string):
return_string = ""
for character in input_string:
try:
character.encode("ascii")
return_string += character
except UnicodeEncodeError:
replaced = unidecode(str(character))
if replaced != '':
return_string += replaced
else:
try:
return_string += "[" + unicodedata.name(character) + "]"
except ValueError:
return_string += "[x]"
return return_string
def main():
parser = argparse.ArgumentParser()
parser.add_argument('c_api_key', help='Twitter consumer API keys')
parser.add_argument('c_api_secret', help='Twitter consumer API secret key')
parser.add_argument('sql_password', help='SQL database password')
args = parser.parse_args()
consumer_token = args.c_api_key
consumer_secret = args.c_api_secret
mydb = mysql.connector.connect(
host = '15.222.147.65',
user = 'root',
passwd = args.sql_password,
database = 'admin_prod'
)
mycursor = mydb.cursor()
auth = tweepy.AppAuthHandler(consumer_token, consumer_secret)
auth.secure = True
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
search_query = '@AskRBC'
retweet_filter='-filter:retweets'
q=search_query+retweet_filter
tweets_per_query = 100
since_id = None
max_id = -1
max_tweets = 10000000
tweet_count = 0
while(True):
while tweet_count < max_tweets:
try:
if (max_id <= 0):
if (not since_id):
new_tweets = api.search(q=search_query, count=tweets_per_query)
else:
new_tweets = api.search(q=search_query, count=tweets_per_query,
since_id=since_id)
else:
if (not since_id):
new_tweets = api.search(q=search_query, count=tweets_per_query,
max_id=str(max_id - 1))
else:
new_tweets = api.search(q=search_query, count=tweets_per_query,
max_id=str(max_id - 1),
since_id=since_id)
if not new_tweets:
break
for tweet in new_tweets:
tweet_count += 1
tweet_text = tweet._json['text']
tweet_user = tweet._json['user']['screen_name']
tweet_pfp = tweet._json['user']['profile_image_url']
tweet_date = tweet._json['created_at']
tweet_id = tweet._json['id_str']
try:
tweet_text.encode('latin1')
sql = 'INSERT INTO tweets (username, user_avatar, tweet_content, date_posted, tweet_id) VALUES (%s, %s, %s, %s, %s)'
val = (tweet_user, tweet_pfp, tweet_text, tweet_date, tweet_id)
mycursor.execute(sql, val)
mydb.commit()
except UnicodeEncodeError:
tweet_text = string_cleaner(tweet_text)
sql = 'INSERT INTO tweets (username, user_avatar, tweet_content, date_posted, tweet_id) VALUES (%s, %s, %s, %s, %s)'
val = (tweet_user, tweet_pfp, tweet_text, tweet_date, tweet_id)
mycursor.execute(sql, val)
mydb.commit()
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
# Just exit if any error
print('some error : ' + str(e))
break
time.sleep(300)
if __name__ == '__main__':
temp_tweets = []
main()