import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import information as pdr
import yfinance as yf
from datetime import datetime, timedelta
import tensorflow as tf
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Guarantee yfinance overrides
yf.pdr_override()
# Get the inventory quote
df = pdr.get_data_yahoo(‘AMD’, begin=’2022-04-01′, finish=datetime.now())
# Create a brand new dataframe with solely the ‘Shut’ column
information = df.filter([‘Close’])
# Convert the dataframe to a numpy array
dataset = information.values
# Get the variety of rows to coach the mannequin on
training_data_len = int(np.ceil(len(dataset) * .95))
# Scale the information utilizing TensorFlow Normalization
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(dataset)
scaled_data = normalizer(dataset)
# Create the coaching information set
train_data = scaled_data[:training_data_len]
# Create the testing information set
test_data = scaled_data[training_data_len – 60:]
# Cut up the information into x_train and y_train information units
x_train = []
y_train = []
for i in vary(60, len(train_data)):
x_train.append(train_data[i-60:i])
y_train.append(train_data[i])
# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
# Cut up the information into x_test and y_test information units
x_test = []
y_test = dataset[training_data_len:]
for i in vary(60, len(test_data)):
x_test.append(test_data[i-60:i])
# Convert the x_test to a numpy array
x_test = np.array(x_test)
# Reshape the information for XGBoost
x_train = x_train.reshape((x_train.form[0], x_train.form[1]))
x_test = x_test.reshape((x_test.form[0], x_test.form[1]))
# Practice the XGBoost mannequin
mannequin = xgb.XGBRegressor(goal=’reg:squarederror’, n_estimators=1000)
mannequin.match(x_train, y_train)
# Get the mannequin’s predicted value values
predictions = mannequin.predict(x_test)
predictions = predictions.reshape(-1, 1)
predictions = normalizer.imply.numpy() + predictions * normalizer.variance.numpy() ** 0.5
# Get the foundation imply squared error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
print(‘RMSE: ‘, rmse)
# Predict the subsequent 10 days
last_60_days = scaled_data[-60:].numpy() # Convert to numpy array
next_10_days = []
for i in vary(10):
X_test = last_60_days[-60:].reshape(1, -1)
pred_price = mannequin.predict(X_test)
pred_price = normalizer.imply.numpy() + pred_price * normalizer.variance.numpy() ** 0.5
next_10_days.append(pred_price[0])
last_60_days = np.append(last_60_days, pred_price).reshape(-1, 1)[-60:]
# Create a dataframe for the subsequent 10 days
last_date = df.index[-1]
future_dates = [last_date + timedelta(days=i) for i in range(1, 11)]
future_predictions = pd.DataFrame(information={‘Date’: future_dates, ‘Shut’: next_10_days})
future_predictions.set_index(‘Date’, inplace=True)
# Create dataframes for plotting and saving
practice = information[:training_data_len]
legitimate = information[training_data_len:]
legitimate[‘Predictions’] = predictions
# Save to Excel
with pd.ExcelWriter(‘stock_predictions_xgboost.xlsx’) as author:
practice.to_excel(author, sheet_name=’Practice’)
legitimate.to_excel(author, sheet_name=’Validation’)
future_predictions.to_excel(author, sheet_name=’Future’)
# Plot the information
plt.determine(figsize=(16, 6))
plt.title(‘Mannequin’)
plt.xlabel(‘Date’, fontsize=18)
plt.ylabel(‘Shut Worth USD ($)’, fontsize=18)
plt.plot(practice[‘Close’])
plt.plot(legitimate[[‘Close’, ‘Predictions’]])
plt.plot(future_predictions[‘Close’], marker=’o’, linestyle=’dashed’, colour=’crimson’)
plt.legend([‘Train’, ‘Val’, ‘Predictions’, ‘Future’], loc=’decrease proper’)
plt.present()