diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..55952c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.coverage +database.ini +my-project-env +__pycache__ +__init__.py +htmlcov/ +Dockerfile \ No newline at end of file diff --git a/README.md b/README.md index 1b99589..e01b2ed 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,136 @@ Const,Your Rating,Date Rated,Title,URL,Title Type,IMDb Rating,Runtime (mins),Yea * Every commit should be GPG signed * Create a Pull Request with updated code +########################################### + +this repo contains following folders + +data: +---- + data consists of ratings.csv file + +config: +------ + read_queries.json // json file that consists of SQL queries + pandas_queries.json // json file consists of pandas commands + +utils: +----- + + dbconfig.py // python file used for configuration of postgreSQL server + +src: +---- +src folder consists of python programs that retrieve and manipulate values in csv file using +pandas and postgreSQL + +src->v1 +------- +1)csv2pandas.py ---- this file has set of instructions that converts csv file to pandas dataframe, creates instance for the class Ratings to retrieve and update user rating + +2)ratings.py ----- this python class contains dataframe values as attributes also consists of methods get_rating(), set_rating() to update user rating along with current date + +src->v2 +------ +1)csv2db.py ---- converts csv file to postgreSQL using pandas, creating instance for the class Ratings to manipulate columns in the + postgreSQL table + +2)ratings.py ---- consists of python class Ratings, involves different methods + to create a table and also perform CRUD(create, read, update, delete) operations on the table + +src->v3 +------ +1)pandasql.py ---- converts csv file pandas Dataframe, creating instance for the class Ratings to manipulate columns in the Dataframe + +2)ratings.py ---- consists of python class Ratings, involves different methods + to create a table and also perform CRUD(create, read, update, delete) operations on the Dataframe + +tests: +------ + +this folder contains unittest files to check functionality of python codes existing in src folder, and also contains json files containing test cases. + +tests->config +------------- + +this folder consists of json files which have parameters for the test cases + +setting up github repository: +---------------------------- + +Before you start working on the project, create your own github repository and generate SSH, GPG keys for authentication. +for more information, refer below: + + +https://docs.github.com/en/get-started/quickstart/create-a-repo + +https://docs.github.com/en/authentication/connecting-to-github-with-ssh + +https://docs.github.com/en/authentication/managing-commit-signature-verification + + +create python virtual environment in your linux system +----------------------------------------------------- + +run python -V (if version is not displayed run sudo apt install python3) + +after installing python, run "python -m venv my-project-env" + +then virtual environment named my-project-env will be created. + +run "source my-project-env/bin/activate" to activate + +run "pip install requests" & "python -c "import requests"" only for the first time. + +to close the virtual environment, type "deactivate" + + +install postgres in linux: +-------------------------- + +type the following command + +"sudo apt-get install postgresql" + +after successful installation, connect to postgresql using +"sudo -i -u postgres" + +the user will now switched to postgres, type "psql" to connect with database server or to return to regular user type "exit" or press ctrl+d + +postgres@user:~$ psql + +postgres=# // here you can create different databases and manage tables + +to create a database +postgres=# "create database my_database;" + +connect to database "my_database" +postgres=# \c my_database + +to exit type \q or ctrl+d + + +setup: +----- +install required dependencies in requirements.txt file + +"pip install -r requirements.txt" + +create a new directory using "mkdir dir_name" + +to navigate into directory use "cd path/to/dir_name" + +create a new file (say python file) use "touch file.py" + +to execute a python script, use command "python file.py" or "python path/to/file.py" + +Generate test coverage: +----------------------- + +type the following commands in the terminal to generate test coverage report + +"coverage run -m unittest discover" + +"coverage report" + +"coverage html" \ No newline at end of file diff --git a/config/panda_queries.json b/config/panda_queries.json new file mode 100644 index 0000000..9818682 --- /dev/null +++ b/config/panda_queries.json @@ -0,0 +1,11 @@ +{ + "List number of rows in a dataframe": "print(self.ratings.shape[0])", + "List number of rows by title type": "print(self.ratings['Title Type'].value_counts())", + "List number of rows by year of rating": "print(pd.DatetimeIndex(self.ratings['Date Rated']).year.value_counts())", + "List number of rows by year of release": "print(self.ratings['Year'].value_counts())", + "List top 10 rated titles": "print(self.ratings[['Title', 'Your Rating']].sort_values(by='Your Rating', ascending=False)[:10])", + "List bottom 10 rated titles": "print(self.ratings[['Title', 'Your Rating']].sort_values(by='Your Rating')[:10])", + "Fetch histogram of ratings": "print(self.ratings['Your Rating'].value_counts())", + "List genres by their average ratings, sorted decrementally: ": "self.ratings['Genres']=self.ratings['Genres'].str.split(', ')\nprint(self.ratings.explode('Genres').groupby('Genres')[['Genres', 'Your Rating']].mean().round(decimals=2).sort_values(by='Your Rating', ascending=False))", + "Filter rows using const id": "print(self.ratings.loc[self.ratings.Const==id])" +} \ No newline at end of file diff --git a/config/pyspark_queries.json b/config/pyspark_queries.json new file mode 100644 index 0000000..030b9a1 --- /dev/null +++ b/config/pyspark_queries.json @@ -0,0 +1,11 @@ +{ + "List number of rows in a dataframe": "print(ratings.count())", + "List number of rows by title type": "ratings.groupBy(ratings['Title Type']).count().sort(desc('count')).show()", + "List number of rows by year of rating": "ratings.withColumn('Year Rated', year(to_date(col('Date Rated'), 'MM/dd/yyyy'))).groupBy('Year Rated').count().sort(desc('count')).show()", + "List number of rows by year of release": "ratings.groupBy(ratings.Year).count().sort(asc('Year')).show()", + "List top 10 rated titles": "ratings.select([ratings['Title'], ratings['Your Rating']]).sort(desc('Your Rating')).show(10)", + "List bottom 10 rated titles": "ratings.select([ratings['Title'], ratings['Your Rating']]).sort(asc('Your Rating')).show(10)", + "Fetch histogram of ratings": "ratings.groupBy(ratings['Your rating']).count().sort(asc('Your Rating')).show()", + "List genres by their average ratings, sorted decrementally: ": "ratings.withColumn('Genres', explode_outer(split('Genres', ', ')).alias('Genres')).groupBy('Genres').agg(round(mean('Your Rating'), 2).alias('Average Rating')).sort(desc('Average Rating')).show(26)", + "Filter rows using const id": "ratings.where(ratings.Const==input('Enter Const id: ')).show()" +} \ No newline at end of file diff --git a/config/read_queries.json b/config/read_queries.json new file mode 100644 index 0000000..0cb1227 --- /dev/null +++ b/config/read_queries.json @@ -0,0 +1,12 @@ +{ + "List total number of records in table: ": "SELECT COUNT(*) FROM ratings;", + "List number of records by title type: ": "SELECT title_type, COUNT(*) FROM ratings GROUP BY title_type; " , + "List number of records by year of rating: ": "SELECT EXTRACT(YEAR FROM date_rated) AS year_of_rating, COUNT(*) FROM ratings GROUP BY 1 ORDER BY 1;", + "List number of records by year of release: ": "SELECT year, COUNT(*) FROM ratings GROUP BY 1 ORDER BY 1;", + "List the top 10 rated titles: ": "SELECT title, your_rating FROM ratings ORDER BY your_rating DESC LIMIT 10;", + "List the bottom 10 rated titles: ": "SELECT title, your_rating FROM ratings ORDER BY your_rating LIMIT 10;", + "List genres by their average ratings, sorted decrementally: ": "SELECT genre, CAST(AVG(your_rating) AS DECIMAL(5,2)) AS average_rating FROM (SELECT DISTINCT(UNNEST(STRING_TO_ARRAY(genres, ', '))) AS genre, your_rating FROM ratings) AS temp GROUP BY genre ORDER BY average_rating DESC;", + "Fetch histogram of ratings: ": "SELECT your_rating, COUNT(*) FROM ratings GROUP BY 1 ORDER BY 1;", + "Filter Records from table using Const": "SELECT * FROM ratings WHERE Const=%s" + +} \ No newline at end of file diff --git a/pyspark_sql/csv2pyspark.py b/pyspark_sql/csv2pyspark.py new file mode 100644 index 0000000..1965761 --- /dev/null +++ b/pyspark_sql/csv2pyspark.py @@ -0,0 +1,15 @@ +import json +from pyspark.sql import SparkSession +from pyspark.sql.functions import * + +spark=SparkSession.builder.appName("pyspark_sql").getOrCreate() +spark.sql("set spark.sql.legacy.timeParserPolicy=LEGACY") +ratings=spark.read.csv('../data/ratings.csv', header=True, inferSchema=True) +pyspark_queries=open('../config/pyspark_queries.json') +query_data=json.load(pyspark_queries) + +# executing pyspark queries one by one + +for query, pdtxt in query_data.items(): + print(query+':\n') + exec(pdtxt) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f5f3248 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pandas==1.4.4 +datetime==4.5 +psycopg2==2.9.3 +unittest2==1.1.0 +coverage==6.4.4 +postgres==4.0 +pyspark==3.3.0 +py4j==0.10.9.5 \ No newline at end of file diff --git a/src/v1/csv2pandas.py b/src/v1/csv2pandas.py new file mode 100644 index 0000000..22efd11 --- /dev/null +++ b/src/v1/csv2pandas.py @@ -0,0 +1,17 @@ +# import necessary packages +import pandas as pd +from ratings import Ratings +# Read the csv file +# Load the data in csv file into a Data Frame + +csv_file=pd.read_csv('../../data/ratings.csv', encoding='latin') + +# iterating over the DataFrame rows using df.iterrows() + +for index, row in csv_file[0:10].iterrows(): + + #creating instance for class Ratings + + this_movie =Ratings(row['Const'], row['Your Rating'], row['Date Rated'], row['Title'], row['URL'], row['Title Type'], row['IMDb Rating'], row['Runtime (mins)'], row['Year'], row['Genres'], row['Num Votes'], row['Release Date'], row['Directors']) + new_rating=input("Enter your rating for "+row['Title']+" : ") + this_movie.set_rating(new_rating) \ No newline at end of file diff --git a/src/v1/ratings.py b/src/v1/ratings.py new file mode 100644 index 0000000..4781f90 --- /dev/null +++ b/src/v1/ratings.py @@ -0,0 +1,119 @@ +# importing necessary modules +from datetime import datetime + +class Ratings: + ''' + Description of class Ratings + + This is a class to update user rating and date rated in pandas dataframe + + atrributes: + ----------- + const //contains unique id that represents movie + your_rating //user rating for the movie + date_rated //date in which user rating is last updated + title //movie title + url //url contains link to access the resource + title_type //type of movie (eg: movie, short) + imdb_rating //average rating for the movie + runtime //total duration + year //year released + genres //category of film + num_votes //number of votes given + release_date + directors //list of directors + + methods defined here: + --------------------- + get_rating(self) + returns your_rating value + + set_rating(self, new_rating) + parameters: + new_rating + + updates value in your_rating to new_rating + updates date_rated to current date + + successful updation depends upon the range in which new_rating lies (0 to 10) + + ''' + def __init__(self, const, your_rating, date_rated, title, url, title_type, imdb_rating, runtime, year, genres, num_votes, release_date, directors): + ''' + Default Constructor for Ratings class + + parameters: + ----------- + const + your_rating + date_rated + title + url + title_type + imdb_rating + runtime + year + genres + num_votes + release_date + directors + ''' + self.const=const + self.your_rating=your_rating + self.date_rated=date_rated + self.title=title + self.url=url + self.title_type=title_type + self.imdb_rating=imdb_rating + self.runtime=runtime + self.year=year + self.genres=genres + self.num_votes=num_votes + self.release_date=release_date + self.directors=directors + + def get_rating(self): + ''' + Summary Line + Extended Description of get_rating(self) + + this method is used to retrieve user rating + ''' + return self.your_rating + + def set_rating(self, new_rating): + ''' + Summary Line + Extended Description of set_rating(self, new_rating) + + parameters: + ---------- + new_rating //input + your_rating + date_rated + + this method is used to update user rating with the input value and also updates the date rated to current date + the current date will be generated by using folowing commands + day=datetime.now() + day=day.strftime("%d/%m/%Y") + + updation will be done only if the input is in valid range i.e, between 0 and 10.0 + stops execution otherwise + + ''' + + try: + new_rating=float(new_rating) + if(new_rating>=0 and new_rating<=10.0): + self.your_rating=new_rating + day=datetime.now() + day=day.strftime("%d/%m/%Y") + self.date_rated=day + print("rating updated sucessfully for title {0}\n".format(self.title)) + else: + print("\ninvalid input!! must be in between 0 and 10!! \nrating not updated\n") + return -1 + except: + print("\ninvalid input!! must be in between 0 and 10!! \nrating not updated\n") + return -1 + \ No newline at end of file diff --git a/src/v2/csv2db.py b/src/v2/csv2db.py new file mode 100644 index 0000000..977994a --- /dev/null +++ b/src/v2/csv2db.py @@ -0,0 +1,44 @@ +# importing required modules +import json + +import pandas as pd + +from ratings import Ratings + +# reading csv file + +print("reading file ratings.csv...\n") +print("converting csv file into pandas dataframe...\n") +df=pd.read_csv("../../data/ratings.csv", encoding='latin') +df['Date Rated'] = pd.to_datetime(df['Date Rated']) +print("csv file is successfully converted into dataframe...\n") + +# creating instance for Ratings class + +this_title=Ratings() + +print("inserting values into database...\n") + +for index, row in df.iterrows(): + this_title.insert(row) + +print("Values are inserted into the database Successfully. \n") + + +# reading json file contains sql queries + +read_queries= open('../../config/read_queries.json') +query_data = json.load(read_queries) + +# executing sql queries one by one + +for query, sqltext in query_data.items(): + print(query+"\n") + if "%s" in sqltext: + val = input("Enter Const id of title: ") + this_title.read(sqltext, val) + else: + this_title.read(sqltext) + +this_title.update("tt1001526", 7.23) +this_title.delete("tt1001526") \ No newline at end of file diff --git a/src/v2/ratings.py b/src/v2/ratings.py new file mode 100644 index 0000000..79b556a --- /dev/null +++ b/src/v2/ratings.py @@ -0,0 +1,208 @@ +# import required dependencies +from datetime import datetime + +import sys +sys.path.append('../..') +from utils.dbconfig import dbconfig +import psycopg2 as ps + +class Ratings: + ''' + Description of class Ratings + + This is a class to manage records of a postgreSQL table + + Attributes: + ---------- + Table named ratings that contains multiple records. + + Methods defined here: + --------------------- + insert(self, row) + input: row + row contains 13 arguments that have information about movie + + output: + inserts the arguments into ratings table + + read(self, query, *params) + input: query, *params + + query consists of sql query to be executed + + output: + executes the sql query and prints status + + update(self, const, new_rating) + input: const, new_rating + const is the primary key in ratings table + new_rating is the last updated user rating for the movie + + output: + updates the user rating in ratings table with new rating given, + also changes the date rated wih current date + + delete(self, const) + input: const + + output: deletes the record in table that matches Const id + + ''' + def __init__(self): + ''' + Default constructor for Ratings + creates a table named ratings, along with 13 columns namely + Const // primary key + Your_Rating + Date_Rated + Title + URL + Title_Type + IMDb_Rating + Runtime_mins + Year + Genres + Num_Votes + Release_Date + Directors + ''' + params=dbconfig() + self.conn = ps.connect(**params) + self.conn.autocommit = True + self.cursor=self.conn.cursor() + + create_table = '''CREATE TABLE IF NOT EXISTS ratings( + Const VARCHAR(50) NOT NULL PRIMARY KEY, + Your_Rating FLOAT, + Date_Rated DATE NOT NULL, + Title VARCHAR(255) NOT NULL, + URL VARCHAR(50) NOT NULL, + Title_Type VARCHAR(50) NOT NULL, + IMDb_Rating FLOAT NOT NULL, + Runtime_mins FLOAT NOT NULL, + Year INT NOT NULL, + Genres VARCHAR(255) NOT NULL, + Num_Votes INT NOT NULL, + Release_Date VARCHAR(50) NOT NULL, + Directors VARCHAR(255) NOT NULL + )''' + self.cursor.execute(create_table) + + def insert(self, row): + ''' + summary line + extended description of insert(self, row) + + parameters: + ---------- + row contains 13 arguments + these 13 arguments holds the information related to movie + this method when called, inserts the values into ratings table + + ''' + try: + columns=("""INSERT INTO ratings (Const, Your_Rating, Date_Rated, Title, URL, Title_type, IMDb_Rating, Runtime_mins, + Year, Genres, Num_Votes, Release_Date, Directors) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (Const) DO NOTHING;""") + values=[row['Const'], row['Your Rating'], row['Date Rated'], row['Title'], row['URL'], + row['Title Type'], row['IMDb Rating'], row['Runtime (mins)'], + row['Year'], row['Genres'], row['Num Votes'], row['Release Date'], row['Directors']] + self.cursor.execute(columns, values) + self.conn.commit() + + except (Exception, ps.DatabaseError) as error: + print(error) + return -1 + + def read(self, query, *params): + ''' + Summary Line + Extended Description of read(self, query, *params) + + parameters: + ---------- + query + params + + executes sql query along with params if exists + prints the status after each execution + + raises exception if any + ''' + try: + values =[*params] + self.cursor.execute(query, values) + result=self.cursor.fetchall() + print(result) + print("\n") + except (Exception, ps.DatabaseError) as error: + print(error) + return -1 + + def update(self, const, new_rating): + ''' + Summary Line + Extended description of update(self, const, new_rating) + + parameters: + ----------- + const //primary key + new_rating + + selects record in ratings table that matches const + + checks the validity of the new_rating + proceeds with updation only if values ranging between 0 and 10 + + updates the value of column your_rating with new_rating + also updates date_rated with current date + prints the status + + raises exception if any + ''' + try: + if (new_rating>=0.0 and new_rating<=10.0): + + day=datetime.now() + day=day.strftime("%d/%m/%Y") + try: + query = '''UPDATE ratings SET Your_Rating = %s, Date_Rated = %s + WHERE Const = %s;''' + values=[new_rating, day, const] + self.cursor.execute(query, values) + self.conn.commit() + print("rating updated in the database successfully...\n") + + except (Exception, ps.DatabaseError) as error: + print(error) + return -1 + else: + print("enter a valid value between 0 and 10!!\n") + return -1 + except: + print("enter a valid value between 0 and 10!!\n") + return -1 + + def delete(self, const): + ''' + Summary Line + Extended Description of delete(self, const) + + parameters: + ----------- + const //primary key + + deletes the record in the table that matches with const + prints the status + + raises exception if any + ''' + try: + query='''DELETE FROM ratings WHERE Const = %s;''' + values = [const] + self.cursor.execute(query, values) + print("record deleted successfully...\n") + except (Exception, ps.DatabaseError) as error: + print(error) + return -1 \ No newline at end of file diff --git a/src/v3/pandasql.py b/src/v3/pandasql.py new file mode 100644 index 0000000..a4c6cae --- /dev/null +++ b/src/v3/pandasql.py @@ -0,0 +1,31 @@ +# importing required modules +import pandas as pd + +import json + +from ratings import Ratings + + +# reading csv file +ratings=pd.read_csv('../../data/ratings.csv', encoding='latin') + +# reading json file contains pandas queries + +panda_queries=open('../../config/panda_queries.json') +query_data=json.load(panda_queries) + +# creating instance for Ratings class + +this_title=Ratings(ratings) + +# executing pandas queries one by one + +for query, pdtxt in query_data.items(): + print(query+':\n') + this_title.read(pdtxt) + + +this_title.update('tt1001526', 7.23) +this_title.delete('tt1013753') +new_row={'Const':['tt1001526'],'Your Rating': [6],'Date Rated':['3/18/2017'],'Title':['Megamind'],'URL':['https://www.imdb.com/title/tt1001526/'],'Title Type':['movie'],'IMDb Rating':['7.3'],'Runtime (mins)':[95],'Year':[2010],'Genres':['Animation, Action, Comedy, Family, Sci-Fi'],'Num Votes':[208264],'Release Date':['10/28/2010'],'Directors':['Tom McGrath']} +this_title.insert(new_row) \ No newline at end of file diff --git a/src/v3/ratings.py b/src/v3/ratings.py new file mode 100644 index 0000000..90884f7 --- /dev/null +++ b/src/v3/ratings.py @@ -0,0 +1,170 @@ +import pandas as pd +from datetime import datetime + +class Ratings: + ''' + Description of class Ratings + + This is a class to manage rows of a pandas Dataframe + + Attributes: + ---------- + Pandas Dataframe named ratings that contains multiple rows and 13 columns. + + Methods defined here: + --------------------- + insert(self, row) + input: row + a dictionary contains 13 arguments + + output: + inserts the arguments into ratings + + read(self, query) + input: query + + query consists of sql query to be executed + + output: + executes the sql query and prints status + + update(self, const, new_rating) + input: const, new_rating + const will be unique value in ratings dataframe + new_rating is the last updated user rating for the movie + + output: + updates the user rating in ratings dataframe with new rating given, + also changes the date rated wih current date + + delete(self, const) + input: const + + output: deletes the row in dataframe that matches Const id + + ''' + def __init__(self, ratings): + ''' + Default Constructor for Ratings + + Parameters: + ---------- + self, ratings + ratings is a Pandas Dataframe with multiple rows and 13 columns + columns include: + Const + Your Rating + Date Rated + Title + URL + Title Type + IMDb Rating + Runtime (mins) + Year + Genres + Num Votes + Release Date + Directors + + ''' + self.ratings=ratings + + def update(self, const, new_rating): + ''' + Summary Line + Extended description of update(self, const, new_rating) + + parameters: + ----------- + const + new_rating + + selects record in ratings Dataframe that matches const + + checks the validity of the new_rating + proceeds with updation only if values ranging between 0 and 10 + + updates the value of column your_rating with new_rating + also updates date_rated with current date + prints the status + + raises exception if any + ''' + try: + new_rating=float(new_rating) + if(new_rating>=0 and new_rating<=10.0): + date=datetime.now() + date=date.strftime("%-m/%-d/%Y") + self.ratings.loc[self.ratings.Const==const, ['Your Rating', 'Date Rated']]=[new_rating, date] + print("rating updated sucessfully for {0}\n".format(self.ratings.loc[self.ratings.Const==const, 'Title'])) + else: + print("\ninvalid input!! must be in between 0 and 10!! \nrating not updated\n") + return -1 + except: + print("\ninvalid input!! must be in between 0 and 10!! \nrating not updated\n") + return -1 + + def read(self, query): + ''' + Summary Line + Extended Description of read(self, query) + + parameters: + ---------- + query + + executes pandas query + prints the status after each execution + + raises exception if any + ''' + try: + if "id" in query: + id=input("Enter Const id: \n") + exec(query) + except: + print("invalid input\n") + return -1 + + def delete(self, const): + ''' + Summary Line + Extended Description of delete(self, const) + + parameters: + ----------- + const + + deletes the row in the Dataframe that matches with const + prints the status + + raises exception if any + ''' + try: + self.ratings.loc[self.ratings.Const==const] + self.ratings.drop(0, inplace = True) + print("row deleted from dataframe sucessfully...\n") + except: + print("invalid input!") + return -1 + + def insert(self, row): + ''' + summary line + extended description of insert(self, row) + + parameters: + ---------- + row contains 13 arguments + these 13 arguments holds the information related to movie + this method when called, inserts/appends the values into ratings dataframe + + ''' + + try: + row=pd.DataFrame(row) + self.ratings=pd.concat([self.ratings, row], axis=0) + print("row inserted into dataframe successfully...") + except: + print("invalid input data!") + return -1 \ No newline at end of file diff --git a/tests/config/v1params.json b/tests/config/v1params.json new file mode 100644 index 0000000..a78475d --- /dev/null +++ b/tests/config/v1params.json @@ -0,0 +1,16 @@ +{ + "get": + { + "assertion when retrieving user rating": [3] + }, + + "set": + { + "assertion when updating user rating to valid value": [6.2, null] , + "assertion when updating user rating to valid value but invalid type (str-10)": ["10", null], + "assertion when updating user rating to invalid value": [18.23, -1], + "assertion when updating user rating to negative value": [-2, -1], + "assertion when updating user rating to invalid type (str-zero)": ["zero", -1] + } + +} \ No newline at end of file diff --git a/tests/config/v2params.json b/tests/config/v2params.json new file mode 100644 index 0000000..8f77eef --- /dev/null +++ b/tests/config/v2params.json @@ -0,0 +1,39 @@ +{ + "insert": + { + "assertion when inserting record into table with valid values": [{"Const": "tt1001526", "Your Rating": 6, "Date Rated": "2017-03-18 00:00:00", "Title": "Megamind", "URL": "https://www.imdb.com/title/tt1001526/", "Title Type": "movie", "IMDb Rating": 7.3, "Runtime (mins)": 95.0, "Year": 2010, "Genres": "Animation, Action, Comedy, Family, Sci-Fi", "Num Votes": 208264, "Release Date": "10/28/2010", "Directors": "Tom McGrath"}, null], + "assertion when inserting record into table with valid values and null rating": [{"Const": "tt1790454", "Your Rating": null, "Date Rated": "2017-03-18 00:00:00", "Title": "Tom and jerry", "URL": "https://www.imdb.com/title/tt1001526/", "Title Type": "movie", "IMDb Rating": 7.3, "Runtime (mins)": 95.0, "Year": 2010, "Genres": "Animation, Action, Comedy, Family, Sci-Fi", "Num Votes": 208264, "Release Date": "10/28/2010", "Directors": "Tom McGrath"}, null], + "assertion when inserting record into table with null values for all columns": [{"Const": null, "Your_Rating": null, "Date Rated": null, "Title": null, "URL": null, "Title Type": null, "IMDb Rating": null, "Runtime (mins)": null, "Year": null, "Genres": null, "Num Votes": null, "Release Date": null, "Directors": null}, -1], + "assertion when inserting record into table with title only": ["Megamind", -1], + "assertion when inserting record into table with const id only": ["tt1001526", -1], + "assertion when inserting record into table by giving column count": [13, -1], + "assertion when inserting record into tale using 13 values": [[1,2,3,4,5,6,7,8,9,10,11,12,13], -1] + + }, + "read": + { + "assertion when reading a record from table that matches title": ["SELECT * FROM ratings WHERE Title=%s;", "Megamind", null], + "assertion when reading columns in table with nonexistent const id": ["SELECT Your_Rating, Title FROM ratings WHERE Const=tt;", null, -1], + "assertion when reading a column that does not exist in the table": ["SELECT Country FROM ratings WHERE Const=%s;", "tt101526", -1], + "assertion when reading records from a nonexistent table": ["SELECT * FROM movies;", null, -1], + "assertion when reading records by giving const id to invalid column": ["SELECT * FROM ratings WHERE Country=%s", "tt101526", -1], + "assertion when no sql query is given": [null, null, -1] + }, + "update": + { + "assertion when updating user rating to valid value": ["tt1001526", 6.2, null], + "assertion when updating user rating to valid value but with nonexistent const id": ["tt1", 0, null], + "assertion when updating user rating to invalid value": ["tt1001526", -2, -1], + "assertion when updating user rating to invalid type (str- my_rating)": ["tt1001526", "my_rating", -1], + "assertion when updating user rating to valid value but invalid type (str-10)": ["tt1001526", "10", -1], + "assertion when updating user rating to valid value but which invalid const id": [1234, 6, -1] + }, + "delete": + { + "assertion when deleting user rating using existing const id": ["tt1001526", null], + "assertion when deleting user rating using const id as %s": ["%s", null], + "assertion when deleting user rating using nonexistent const id": ["tt", null], + "assertion when deleting user rating using an integer": [1234, -1], + "assertion when trying to delete a rating using the rating itself": [6, -1] + } +} \ No newline at end of file diff --git a/tests/config/v3params.json b/tests/config/v3params.json new file mode 100644 index 0000000..02829c0 --- /dev/null +++ b/tests/config/v3params.json @@ -0,0 +1,39 @@ +{ + "insert": + { + "assertion when inserting row into dataframe with valid values": [{"Const": ["tt1001526"], "Your Rating": [6], "Date Rated": ["2017-03-18 00:00:00"], "Title": ["Megamind"], "URL": ["https://www.imdb.com/title/tt1001526/"], "Title Type": ["movie"], "IMDb Rating": [7.3], "Runtime (mins)": [95.0], "Year": [2010], "Genres": ["Animation, Action, Comedy, Family, Sci-Fi"], "Num Votes": [208264], "Release Date": ["10/28/2010"], "Directors": ["Tom McGrath"]}, null], + "assertion when inserting row into dataframe with valid values and null rating": [{"Const": ["tt1790454"], "Your Rating": [null], "Date Rated": ["2017-03-18 00:00:00"], "Title": ["Tom and jerry"], "URL": ["https://www.imdb.com/title/tt1001526/"], "Title Type": ["movie"], "IMDb Rating": [7.3], "Runtime (mins)": [95.0], "Year": [2010], "Genres": ["Animation, Action, Comedy, Family, Sci-Fi"], "Num Votes": [208264], "Release Date": ["10/28/2010"], "Directors": ["Tom McGrath"]}, null], + "assertion when inserting row into dataframe using 13 values": [[1,2,3,4,5,6,7,8,9,10,11,12,13], null], + "assertion when inserting row into dataframe with title only": ["Megamind", -1], + "assertion when inserting row into dataframe with const id only": ["tt1001526", -1], + "assertion when inserting row into dataframe by giving column count": [13, -1] + + }, + "read": + { + "assertion when reading a row from dataframe that matches title": ["print(self.ratings.loc[self.ratings.Title=='Megamind'])", null], + "assertion when reading columns in dataframe with nonexistent const id": ["print(self.ratings.loc[self.ratings.Const=='tt'][['Your Rating', 'Title']])", null], + "assertion when reading a column that does not exist": ["print(self.ratings.loc[self.ratings.Const=='tt1001526']['Country'])", -1], + "assertion when reading rows from a nonexistent dataframe": ["print(movies)", -1], + "assertion when reading rows by giving const id to invalid column": ["print(self.ratings.loc[self.ratings.Country=='tt1001526'])", -1], + "assertion when query is given": [null, -1], + "Filter rows using const id": ["print(self.ratings.loc[self.ratings.Const==id])", null] + }, + "update": + { + "assertion when updating user rating to valid value": ["tt1001526", 6.2, null], + "assertion when updating user rating to valid value but with nonexistent const id": ["tt1", 0, null], + "assertion when updating user rating to invalid value": ["tt1001526", -2, -1], + "assertion when updating user rating to invalid type (str- my_rating)": ["tt1001526", "my_rating", -1], + "assertion when updating user rating to valid value but invalid type (str-10)": ["tt1001526", "10", null] + + }, + "delete": + { + "assertion when deleting user rating using existing const id": ["tt1001526", null], + "assertion when deleting user rating using const id as %s": ["%s", -1], + "assertion when deleting user rating using nonexistent const id": ["tt", -1], + "assertion when deleting user rating using an integer": [1234, -1], + "assertion when trying to delete a rating using the rating itself": [6, -1] + } +} \ No newline at end of file diff --git a/tests/v1/testv1.py b/tests/v1/testv1.py new file mode 100644 index 0000000..e31df98 --- /dev/null +++ b/tests/v1/testv1.py @@ -0,0 +1,24 @@ +import unittest +import json +import sys +sys.path.append('../..') +from src.v1.ratings import Ratings + +class TestRating(unittest.TestCase): + file_json= open('../../tests/config/v1params.json') + data = json.load(file_json) + instance = Ratings("tt0100802", 3, 4/19/2015, "Total Recall", "https://www.imdb.com/title/tt0100802/", "movie", 7.5, 113.0, 1990, "https://www.imdb.com/title/tt0100802/", 278834, "5/31/1990", "Paul Verhoeven") + + def test_set_rating(self): + for query, text in self.data['set'].items(): + print(query+"\n") + self.assertEqual(self.instance.set_rating(text[0]), text[1]) + + def test_get_rating(self): + + for query, text in self.data['get'].items(): + print(query+'\n') + self.assertEqual(self.instance.get_rating(), text[0]) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/v2/testv2.py b/tests/v2/testv2.py new file mode 100644 index 0000000..b7ee4d2 --- /dev/null +++ b/tests/v2/testv2.py @@ -0,0 +1,34 @@ +import unittest +import json +import sys +sys.path.append('../..') +from src.v2.ratings import Ratings + +class Testrecords(unittest.TestCase): + + instance = Ratings() + file_json= open('../../tests/config/v2params.json') + data = json.load(file_json) + + def test_insert(self): + for query, text in self.data['insert'].items(): + print(query+"\n") + self.assertEqual(self.instance.insert(text[0]), text[1]) + + def test_read(self): + for query, text in self.data['read'].items(): + print(query+"\n") + self.assertEqual(self.instance.read(text[0], text[1]), text[2]) + + def test_update(self): + for query, text in self.data['update'].items(): + print(query+"\n") + self.assertEqual(self.instance.update(text[0], text[1]), text[2]) + + def test_delete(self): + for query, text in self.data['delete'].items(): + print(query+"\n") + self.assertEqual(self.instance.delete(text[0]), text[1]) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/v3/testv3.py b/tests/v3/testv3.py new file mode 100644 index 0000000..99128af --- /dev/null +++ b/tests/v3/testv3.py @@ -0,0 +1,36 @@ +import unittest +import json +import pandas as pd +import sys +sys.path.append('../..') +from src.v3.ratings import Ratings + +class Testrecords(unittest.TestCase): + ratings=pd.read_csv('../../data/ratings.csv', encoding='latin') + instance = Ratings(ratings) + file_json= open('../../tests/config/v3params.json') + data = json.load(file_json) + + def test_insert(self): + for query, text in self.data['insert'].items(): + print(query+"\n") + self.assertEqual(self.instance.insert(text[0]), text[1]) + + def test_read(self): + for query, text in self.data['read'].items(): + print(query+"\n") + self.assertEqual(self.instance.read(text[0]), text[1]) + + def test_update(self): + for query, text in self.data['update'].items(): + print(query+"\n") + self.assertEqual(self.instance.update(text[0], text[1]), text[2]) + + def test_delete(self): + for query, text in self.data['delete'].items(): + print(query+"\n") + self.assertEqual(self.instance.delete(text[0]), text[1]) + +if __name__ == '__main__': + unittest.main() + diff --git a/utils/dbconfig.py b/utils/dbconfig.py new file mode 100644 index 0000000..ec29304 --- /dev/null +++ b/utils/dbconfig.py @@ -0,0 +1,17 @@ +from configparser import ConfigParser +def dbconfig(name='../../database.ini', section='postgresql'): + # create a parser + parser = ConfigParser() + # read config file + parser.read(name) + db = {} + + if parser.has_section(section): + params = parser.items(section) + for param in params: + db[param[0]] = param[1] + + else: + raise Exception('Section {0} not found in the {1} file'.format(section, name)) + + return db \ No newline at end of file