-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutility_funcs.py
More file actions
293 lines (238 loc) · 9.84 KB
/
utility_funcs.py
File metadata and controls
293 lines (238 loc) · 9.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import numpy as np
import pandas as pd
from dataclasses import dataclass
import datetime as dt
from typing import List
from yahoo_fin import stock_info as si
from functools import reduce
import requests
import json
import os
import sys
if __name__ == "__main__":
print("Run app.py instead")
sys.exit()
CURRENT_FOLDER = os.path.dirname(os.path.abspath(__file__))
FIELDS = [
"Name",
"Ticker",
"Currency",
"Date Bought",
"Date Sold",
"Holding",
"Book Cost",
"Commission",
"FX Charge",
"Exchange",
] # JSON fields for each stock
files = [
"data/currency_cache.json",
"data/config.json",
"data/portfolio.json",
"data/stock_cache.csv",
] # relevant file names
[CURRENCY_CACHE_FILE, CONFIG_FILE, PORTFOLIO_FILE, STOCK_CACHE_FILE] = [
os.path.join(CURRENT_FOLDER, x) for x in files
] # get corrent path to files
with open(CURRENCY_CACHE_FILE, "r") as f:
CURRENCY_DATA = json.load(f)
with open(CONFIG_FILE, "r") as f:
config_data = json.load(f)
BASE_CURRENCY = config_data["BASE_CURRENCY"]
VERBOSE = config_data["VERBOSE"]
@dataclass
class Stock:
"""Dataclass that holds all relevant information about a stock.
After loading, the `data` dataframe is populated with the value of the stock over the required time period."""
name: str
ticker: str
currency: str
date_bought: dt.datetime
holding: float
book_cost: float # in GBP
commission: float
fx_charge: float
exchange: str # either LSE, NASDAQ or CRYPTO
date_sold: dt.datetime = dt.date.today()
data: pd.DataFrame = None
gained: bool = False
def __post_init__(self):
if self.data is None:
# no data for this stock was present in cache, so fetch new data
if VERBOSE: print("getting values")
self.data = get_values(
parse_date(self.date_bought),
parse_date(self.date_sold),
self.ticker,
exchange=self.exchange,
)
# apply currency conversion (if required):
if self.currency != BASE_CURRENCY:
self.data["value"] = self.data.apply(
lambda row: convert_currency(
row["value"], row["time"], self.currency
)
* 100,
axis=1,
)
# apply commission/fx charge using book price
self.data.loc[(self.data.index[0], "value")] = (
self.book_cost * 100 / self.holding
) # equivalent to self.data.iloc[0]["value"], but prevents SettingWithCopyWarning
if VERBOSE: print(self.data)
else:
# data was cached, but is not fully up to date
last_date = self.data.index[-1].date()
# to avoid confusion/out of date data, remove all data generated on this date
self.data = self.data[self.data["time"].dt.date != last_date]
new_data = get_values(
last_date,
parse_date(self.date_sold),
self.ticker,
exchange=self.exchange,
)
new_data.drop_duplicates(inplace=True)
if self.currency != BASE_CURRENCY:
new_data["value"] = new_data.apply(
lambda row: convert_currency(
row["value"], row["time"], self.currency
)
* 100,
axis=1,
)
self.data = pd.concat([self.data, new_data])
def load_portfolio(file: str = PORTFOLIO_FILE) -> List[Stock]:
"""return is a list of Stock objects. Each Stock contains all the information about the stock from the json,
plus a dataframe showing prices between start date and end date"""
with open(file, "r") as f:
stock_list = json.load(f)
# load in cached data
try:
imported_data = pd.read_csv(STOCK_CACHE_FILE)
imported_data["time"] = pd.to_datetime(imported_data["time"])
except pd.errors.EmptyDataError:
# handle case where no data in file
imported_data = pd.DataFrame(columns=["time", *stock_list])
rep = []
# create stock objects
for stock in stock_list:
name = stock["name"]
# check if stock has any data cached, and if it does, assign it to the new stock
if name in list(imported_data.columns):
stock["data"] = imported_data[["time", name]].fillna(method="ffill")
stock["data"].columns = ["time", "value"]
stock["data"].index = stock["data"]["time"]
new_stock = Stock(**stock)
rep.append(new_stock)
# once all stocks have been created and __post_init__() has run, save to cache
# create a list of all the dataframes
dataframes: List[pd.DataFrame] = [
stock.data.drop("time", axis=1).drop(stock.data.index[-1]) for stock in rep
]
# to concaternate, we require that all arrays have the same index. Therefore, we need to fill any missing index values with NaN
all_timestamps = np.unique(np.concatenate([df.index.values for df in dataframes]))
for i, df in enumerate(dataframes):
missing_indices = np.setdiff1d(all_timestamps, df.index.values)
nan_rows = pd.DataFrame(
{"value": np.empty(len(missing_indices)).fill(np.nan)},
index=missing_indices,
dtype=np.float64,
)
dataframes[i] = pd.concat([df, nan_rows]).sort_index()
# last row in each df is dropped as this is a real-time value and may not be applicable in future
try:
to_cache = pd.concat(
dataframes,
axis=1,
)
to_cache.columns = [stock.name for stock in rep]
to_cache.to_csv(STOCK_CACHE_FILE, index_label="time")
except Exception as e:
# for some reason this is usually a temporary problem that seems to sort itself out when code is run a few days later
print(f"Caching has failed. Error message: \n{e}")
return rep
def get_values(
start: dt.datetime, end: dt.datetime, ticker: str, exchange: str = "LSE"
) -> pd.DataFrame:
""" Returns the values of a stock between a start and end date in a DataFrame"""
times = config_data["EXCHANGE_TIMES"][exchange] # open times of various exchanges
# collect the raw data from Yahoo Finance, take only the open and close columns
if VERBOSE: print("Fetching data:")
try:
if VERBOSE: print(ticker)
raw = si.get_data(
ticker,
start.strftime("%m/%d/%y"),
(end + dt.timedelta(days=1)).strftime("%m/%d/%y"),
)[["open", "close"]]
except KeyError:
# some wierd quirk with the yahoo_fin module
index = pd.date_range(start, end, freq="1D")
raw = pd.DataFrame(np.nan, columns=["open", "close"], index=index)
except AssertionError as e:
print("Assertion error. Ticker likely does not exist")
print(e)
raise AssertionError
# turn into data frame with one column (value) and forward fill any missing values
rep = pd.concat([raw["open"], raw["close"]]).to_frame().fillna(method="ffill")
rep.columns = ["value"]
# add open and close times to index, and return sorted dataframe
rep.index = pd.concat(
[
raw.index.to_series() + dt.timedelta(hours=times["open"]),
raw.index.to_series() + dt.timedelta(hours=times["close"]),
]
)
rep: pd.DataFrame = rep.rename(index={rep.index[-1]: dt.datetime.now()})
if rep.index[-1] < rep.index[-2]:
rep.drop(rep.index[-1], inplace=True)
# crypto assets need their currency converted to pence
if exchange == "CRYPTO":
rep["value"] = rep["value"] * 100.0
return rep.sort_index().assign(time=rep.index.values)
def merge_portfolio(portfolio: List[Stock]) -> pd.DataFrame:
""" Merges all stocks in portfolio into one DataFrame"""
daily_average_dfs = [] # array of DataFrames, each holding average value of each stock for a period of days
for stock in portfolio:
df = stock.data.copy().fillna(method="ffill")
# get mean for each day
df = df.groupby([df["time"].dt.date]).mean() * stock.holding
df["book_cost"] = stock.book_cost * 100.0
# if stock does not have recorded value for this day, set book cost to 0
df.loc[np.isnan(df["value"]), "book_cost"] = 0
daily_average_dfs.append(df)
# combine dataframes, and add actual change and percentage change columns
rep = reduce(lambda a, b: a.add(b, fill_value=0), daily_average_dfs)
rep["actual_change"] = rep["value"] - rep["book_cost"]
rep["percent_change"] = rep["actual_change"] * 100 / rep["book_cost"]
return rep
def convert_currency(
value: float, date: dt.datetime = dt.date.today(), c_from: str = "USD"
) -> float:
""" Converts currency at `date` from `c_from` to global currency"""
global CURRENCY_DATA
date_str = date.strftime("%Y-%m-%d")
try:
return value * CURRENCY_DATA[c_from][date]
except KeyError:
request = requests.get(
f"https://api.exchangerate.host/convert?from={c_from}&to={BASE_CURRENCY}&date={date_str}"
).json()["info"]["rate"]
CURRENCY_DATA[c_from][date_str] = request
with open(CURRENCY_CACHE_FILE, "w") as f:
json.dump(CURRENCY_DATA, f)
return value * request
def get_current_price(ticker: str) -> np.float64:
return si.get_live_price(ticker)
def parse_date(date_string: str) -> dt.datetime:
"""parses date from YYYY-MM-DD to datetime object.
Returns current date if date_string is empty"""
if not date_string:
return dt.date.today()
return dt.datetime.strptime(date_string, "%Y-%m-%d")
def add_new_stock_to_file(new_data: tuple):
with open(PORTFOLIO_FILE, "r") as f:
portfolio = json.load(f)
new_data = list(new_data)
new_data[5:9] = [int(i) for i in new_data[5:9]]
print(new_data)