From d6e6019026ced5caeb05caa774cd2ce0a5a7a0c5 Mon Sep 17 00:00:00 2001 From: shakir544 Date: Sat, 19 Jul 2014 20:09:22 -0500 Subject: [PATCH 1/2] Update tweet_sentiment.py --- tweet_sentiment.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tweet_sentiment.py b/tweet_sentiment.py index 9f6c6ba..a09b675 100644 --- a/tweet_sentiment.py +++ b/tweet_sentiment.py @@ -39,20 +39,21 @@ def main(): score, based on the sent_dict. ''' for index in range(len(tweets)): - tweet_word = tweets[index]["text"].split() - sent_score = 0 - for word in tweet_word: - word = word.rstrip('?:!.,;"!@') - word = word.replace("\n", "") + if index and 'text' in tweets[index]: + tweet_word = tweets[index]["text"].split() + sent_score = 0 + for word in tweet_word: + word = word.rstrip('?:!.,;"!@') + word = word.replace("\n", "") - if not (word.encode('utf-8', 'ignore') == ""): - if word.encode('utf-8') in sentiment.keys(): - sent_score = sent_score + float(sentiment[word]) + if not (word.encode('utf-8', 'ignore') == ""): + if word.encode('utf-8') in sentiment.keys(): + sent_score = sent_score + float(sentiment[word]) - else: - sent_score = sent_score + else: + sent_score = sent_score - print float(sent_score) + print float(sent_score) From d271856ff2ce3b0e954503eebc56160605558ceb Mon Sep 17 00:00:00 2001 From: shakir544 Date: Sat, 19 Jul 2014 20:13:12 -0500 Subject: [PATCH 2/2] Update tweet_sentiment.py --- tweet_sentiment.py | 59 +++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/tweet_sentiment.py b/tweet_sentiment.py index a09b675..c324b22 100644 --- a/tweet_sentiment.py +++ b/tweet_sentiment.py @@ -1,3 +1,4 @@ +#!/usr/bin/python import sys import json @@ -5,61 +6,43 @@ twitterData = sys.argv[2] #output.txt def tweet_dict(twitterData): - ''' (file) -> list of dictionaries - This method should take your output.txt - file and create a list of dictionaries. - ''' + twitter_list_dict = [] twitterfile = open(twitterData) for line in twitterfile: - #twitter_list_dict.append(json.loads(line.decode('utf-8-sig'))) - twitter_list_dict.append(json.loads(line)) + twitter_list_dict.append(json.loads(line)) return twitter_list_dict - #return data_read["text"] + def sentiment_dict(sentimentData): - ''' (file) -> dictionary - This method should take your sentiment file - and create a dictionary in the form {word: value} - ''' + afinnfile = open(sentimentData) - scores = {} # initialize an empty dictionary + scores = {} for line in afinnfile: - term, score = line.split("\t") # The file is tab-delimited. "\t" means "tab character" - scores[term] = float(score) # Convert the score to an integer. + term, score = line.split("\t") + scores[term] = int(score) - return scores # Print every (term, score) pair in the dictionary + return scores def main(): tweets = tweet_dict(twitterData) sentiment = sentiment_dict(sentimentData) - - '''Create a method below that loops through each tweet in your - twees_list. For each individual tweet it should add up you sentiment - score, based on the sent_dict. - ''' for index in range(len(tweets)): if index and 'text' in tweets[index]: - tweet_word = tweets[index]["text"].split() - sent_score = 0 - for word in tweet_word: - word = word.rstrip('?:!.,;"!@') - word = word.replace("\n", "") + tweet_word = tweets[index]['text'].split() + sent_score = 0 + for word in tweet_word: + word = word.rstrip('?:!.,;"!@') + word = word.replace("\n", "") - if not (word.encode('utf-8', 'ignore') == ""): - if word.encode('utf-8') in sentiment.keys(): - sent_score = sent_score + float(sentiment[word]) + if not (word.encode('utf-8', 'ignore') == ""): + if word.encode('utf-8') in sentiment.keys(): + sent_score = sent_score + float(sentiment[word]) - else: - sent_score = sent_score - - print float(sent_score) - - + else: + sent_score = sent_score - - - - + print float(sent_score) + if __name__ == '__main__': main()